[interp] Move from stack based to fully local var based design (#46037)
authormonojenkins <jo.shields+jenkins@xamarin.com>
Fri, 8 Jan 2021 11:55:18 +0000 (06:55 -0500)
committerGitHub <noreply@github.com>
Fri, 8 Jan 2021 11:55:18 +0000 (13:55 +0200)
Instead of having instructions that push and pop from the stack, every instruction has explicit dreg and sregs.

While the purpose of this PR is mainly to make it easier to implement more advanced optimization in the future, it also has noticeable performance implications. The code is simplified because we no longer need to update and save the SP. However, the code for each instruction is bloated due to the addition of explicit source and destination offsets. This is counteracted by the reduction of the total number of instructions, since ldloc/stloc and moves become redundant and they are mostly optimized away, even in this implementation state. Here are the total number of executed opcodes as part of running the corlib test suite with the interp https://gist.github.com/BrzVlad/d62f504930b75cba4b870e6dbd947e90.

Co-authored-by: BrzVlad <BrzVlad@users.noreply.github.com>
src/mono/mono/mini/interp/interp-internals.h
src/mono/mono/mini/interp/interp.c
src/mono/mono/mini/interp/mintops.c
src/mono/mono/mini/interp/mintops.def
src/mono/mono/mini/interp/mintops.h
src/mono/mono/mini/interp/transform.c
src/mono/mono/mini/interp/transform.h
src/mono/mono/mini/interp/whitebox.c

index 69fcc40..6e24075 100644 (file)
@@ -127,7 +127,9 @@ struct InterpMethod {
        MonoJitInfo *jinfo;
        MonoDomain *domain;
 
+       // This doesn't include the size of stack locals
        guint32 total_locals_size;
+       // The size of locals that map to the execution stack
        guint32 stack_size;
        guint32 alloca_size;
        int num_clauses; // clauses
@@ -185,8 +187,6 @@ typedef struct FrameClauseArgs FrameClauseArgs;
 
 /* State of the interpreter main loop */
 typedef struct {
-       stackval *sp;
-       unsigned char *vt_sp;
        const unsigned short  *ip;
 } InterpState;
 
index 24cc42c..f4075f2 100644 (file)
@@ -221,11 +221,11 @@ frame_data_allocator_pop (FrameDataAllocator *stack, InterpFrame *frame)
  *   Reinitialize a frame.
  */
 static void
-reinit_frame (InterpFrame *frame, InterpFrame *parent, InterpMethod *imethod, stackval *sp)
+reinit_frame (InterpFrame *frame, InterpFrame *parent, InterpMethod *imethod, gpointer stack)
 {
        frame->parent = parent;
        frame->imethod = imethod;
-       frame->stack = sp;
+       frame->stack = (stackval*)stack;
        frame->state.ip = NULL;
 }
 
@@ -1433,10 +1433,9 @@ interp_frame_arg_to_data (MonoInterpFrameHandle frame, MonoMethodSignature *sig,
        InterpFrame *iframe = (InterpFrame*)frame;
        InterpMethod *imethod = iframe->imethod;
 
-       // If index == 1, we finished executing an InterpFrame, thus we always have imethod set,
-       // and the result is at the bottom of the execution stack.
+       // If index == -1, we finished executing an InterpFrame and the result is at the bottom of the stack.
        if (index == -1)
-               stackval_to_data (sig->ret, STACK_ADD_BYTES (iframe->stack, imethod->total_locals_size), data, TRUE);
+               stackval_to_data (sig->ret, iframe->stack, data, TRUE);
        else if (sig->hasthis && index == 0)
                *(gpointer*)data = iframe->stack->data.p;
        else
@@ -1465,7 +1464,7 @@ interp_frame_arg_to_storage (MonoInterpFrameHandle frame, MonoMethodSignature *s
        InterpMethod *imethod = iframe->imethod;
 
        if (index == -1)
-               return STACK_ADD_BYTES (iframe->stack, imethod->total_locals_size);
+               return iframe->stack;
        else
                return STACK_ADD_BYTES (iframe->stack, get_arg_offset (imethod, sig, index));
 }
@@ -1498,7 +1497,7 @@ interp_to_native_trampoline (gpointer addr, gpointer ccontext)
 #ifdef _MSC_VER
 #pragma optimize ("", off)
 #endif
-static MONO_NO_OPTIMIZATION MONO_NEVER_INLINE void
+static MONO_NO_OPTIMIZATION MONO_NEVER_INLINE gpointer
 ves_pinvoke_method (
        InterpMethod *imethod,
        MonoMethodSignature *sig,
@@ -1596,7 +1595,7 @@ ves_pinvoke_method (
 #endif
        goto exit_pinvoke; // prevent unused label warning in some configurations
 exit_pinvoke:
-       return;
+       return NULL;
 }
 #ifdef _MSC_VER
 #pragma optimize ("", on)
@@ -1691,7 +1690,7 @@ interp_delegate_ctor (MonoObjectHandle this_obj, MonoObjectHandle target, gpoint
  */
 #ifndef ENABLE_NETCORE
 static MONO_NEVER_INLINE MonoException*
-ves_imethod (InterpFrame *frame, MonoMethod *method, MonoMethodSignature *sig, stackval *sp, stackval *retval)
+ves_imethod (InterpFrame *frame, MonoMethod *method, MonoMethodSignature *sig, stackval *sp)
 {
        const char *name = method->name;
        mono_class_init_internal (method->klass);
@@ -1699,11 +1698,11 @@ ves_imethod (InterpFrame *frame, MonoMethod *method, MonoMethodSignature *sig, s
        if (method->klass == mono_defaults.array_class) {
                if (!strcmp (name, "UnsafeMov")) {
                        /* TODO: layout checks */
-                       stackval_from_data (sig->ret, retval, (char*) sp, FALSE);
+                       stackval_from_data (sig->ret, sp, (char*) sp, FALSE);
                        return NULL;
                }
                if (!strcmp (name, "UnsafeLoad"))
-                       return ves_array_get (frame, sp, retval, sig, FALSE);
+                       return ves_array_get (frame, sp, sp, sig, FALSE);
        }
        
        g_error ("Don't know how to exec runtime method %s.%s::%s", 
@@ -1713,22 +1712,6 @@ ves_imethod (InterpFrame *frame, MonoMethod *method, MonoMethodSignature *sig, s
 #endif
 
 #if DEBUG_INTERP
-static char*
-dump_stack (stackval *stack, stackval *sp)
-{
-       stackval *s = stack;
-       GString *str = g_string_new ("");
-       
-       if (sp == stack)
-               return g_string_free (str, FALSE);
-       
-       while (s < sp) {
-               g_string_append_printf (str, "[%p (%" PRId64 ")] ", s->data.l, (gint64)s->data.l);
-               ++s;
-       }
-       return g_string_free (str, FALSE);
-}
-
 static void
 dump_stackval (GString *str, stackval *s, MonoType *type)
 {
@@ -1785,7 +1768,7 @@ dump_retval (InterpFrame *inv)
        MonoType *ret = mono_method_signature_internal (inv->imethod->method)->ret;
 
        if (ret->type != MONO_TYPE_VOID)
-               dump_stackval (str, STACK_ADD_BYTES (inv->stack, inv->imethod->total_locals_size), ret);
+               dump_stackval (str, inv->stack, ret);
 
        return g_string_free (str, FALSE);
 }
@@ -1937,8 +1920,8 @@ interp_runtime_invoke (MonoMethod *method, void *obj, void **params, MonoObject
                 */
                return NULL;
        }
-       // The return value is at the bottom of the stack, after the locals
-       return STACK_ADD_BYTES (frame.stack, imethod->total_locals_size)->data.o;
+       // The return value is at the bottom of the stack
+       return frame.stack->data.o;
 }
 
 typedef struct {
@@ -2026,10 +2009,10 @@ interp_entry (InterpEntryData *data)
        // The return value is at the bottom of the stack, after the locals space
        type = rmethod->rtype;
        if (type->type != MONO_TYPE_VOID)
-               stackval_to_data (type, STACK_ADD_BYTES (frame.stack, rmethod->total_locals_size), data->res, FALSE);
+               stackval_to_data (type, frame.stack, data->res, FALSE);
 }
 
-static stackval *
+static void
 do_icall (MonoMethodSignature *sig, int op, stackval *sp, gpointer ptr, gboolean save_last_error)
 {
 #ifdef ENABLE_NETCORE
@@ -2047,91 +2030,79 @@ do_icall (MonoMethodSignature *sig, int op, stackval *sp, gpointer ptr, gboolean
        case MINT_ICALL_V_P: {
                typedef gpointer (*T)(void);
                T func = (T)ptr;
-               sp++;
-               sp [-1].data.p = func ();
+               sp [0].data.p = func ();
                break;
        }
        case MINT_ICALL_P_V: {
                typedef void (*T)(gpointer);
                T func = (T)ptr;
-               func (sp [-1].data.p);
-               sp --;
+               func (sp [0].data.p);
                break;
        }
        case MINT_ICALL_P_P: {
                typedef gpointer (*T)(gpointer);
                T func = (T)ptr;
-               sp [-1].data.p = func (sp [-1].data.p);
+               sp [0].data.p = func (sp [0].data.p);
                break;
        }
        case MINT_ICALL_PP_V: {
                typedef void (*T)(gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 2;
                func (sp [0].data.p, sp [1].data.p);
                break;
        }
        case MINT_ICALL_PP_P: {
                typedef gpointer (*T)(gpointer,gpointer);
                T func = (T)ptr;
-               --sp;
-               sp [-1].data.p = func (sp [-1].data.p, sp [0].data.p);
+               sp [0].data.p = func (sp [0].data.p, sp [1].data.p);
                break;
        }
        case MINT_ICALL_PPP_V: {
                typedef void (*T)(gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 3;
                func (sp [0].data.p, sp [1].data.p, sp [2].data.p);
                break;
        }
        case MINT_ICALL_PPP_P: {
                typedef gpointer (*T)(gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 2;
-               sp [-1].data.p = func (sp [-1].data.p, sp [0].data.p, sp [1].data.p);
+               sp [0].data.p = func (sp [0].data.p, sp [1].data.p, sp [2].data.p);
                break;
        }
        case MINT_ICALL_PPPP_V: {
                typedef void (*T)(gpointer,gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 4;
                func (sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p);
                break;
        }
        case MINT_ICALL_PPPP_P: {
                typedef gpointer (*T)(gpointer,gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 3;
-               sp [-1].data.p = func (sp [-1].data.p, sp [0].data.p, sp [1].data.p, sp [2].data.p);
+               sp [0].data.p = func (sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p);
                break;
        }
        case MINT_ICALL_PPPPP_V: {
                typedef void (*T)(gpointer,gpointer,gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 5;
                func (sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p, sp [4].data.p);
                break;
        }
        case MINT_ICALL_PPPPP_P: {
                typedef gpointer (*T)(gpointer,gpointer,gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 4;
-               sp [-1].data.p = func (sp [-1].data.p, sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p);
+               sp [0].data.p = func (sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p, sp [4].data.p);
                break;
        }
        case MINT_ICALL_PPPPPP_V: {
                typedef void (*T)(gpointer,gpointer,gpointer,gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 6;
                func (sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p, sp [4].data.p, sp [5].data.p);
                break;
        }
        case MINT_ICALL_PPPPPP_P: {
                typedef gpointer (*T)(gpointer,gpointer,gpointer,gpointer,gpointer,gpointer);
                T func = (T)ptr;
-               sp -= 5;
-               sp [-1].data.p = func (sp [-1].data.p, sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p, sp [4].data.p);
+               sp [0].data.p = func (sp [0].data.p, sp [1].data.p, sp [2].data.p, sp [3].data.p, sp [4].data.p, sp [5].data.p);
                break;
        }
        default:
@@ -2143,9 +2114,7 @@ do_icall (MonoMethodSignature *sig, int op, stackval *sp, gpointer ptr, gboolean
 
        /* convert the native representation to the stackval representation */
        if (sig)
-               stackval_from_data (sig->ret, &sp [-1], (char*) &sp [-1].data.p, sig->pinvoke);
-
-       return sp;
+               stackval_from_data (sig->ret, &sp [0], (char*) &sp [0].data.p, sig->pinvoke);
 }
 
 /* MONO_NO_OPTIMIZATION is needed due to usage of INTERP_PUSH_LMF_WITH_CTX. */
@@ -2153,19 +2122,19 @@ do_icall (MonoMethodSignature *sig, int op, stackval *sp, gpointer ptr, gboolean
 #pragma optimize ("", off)
 #endif
 // Do not inline in case order of frame addresses matters, and maybe other reasons.
-static MONO_NO_OPTIMIZATION MONO_NEVER_INLINE stackval *
+static MONO_NO_OPTIMIZATION MONO_NEVER_INLINE gpointer
 do_icall_wrapper (InterpFrame *frame, MonoMethodSignature *sig, int op, stackval *sp, gpointer ptr, gboolean save_last_error)
 {
        MonoLMFExt ext;
        INTERP_PUSH_LMF_WITH_CTX (frame, ext, exit_icall);
 
-       sp = do_icall (sig, op, sp, ptr, save_last_error);
+       do_icall (sig, op, sp, ptr, save_last_error);
 
        interp_pop_lmf (&ext);
 
        goto exit_icall; // prevent unused label warning in some configurations
 exit_icall:
-       return sp;
+       return NULL;
 }
 #ifdef _MSC_VER
 #pragma optimize ("", on)
@@ -3031,19 +3000,11 @@ static long opcode_counts[MINT_LASTOP];
 #if DEBUG_INTERP
 #define DUMP_INSTR() \
        if (tracing > 1) { \
-               char *ins; \
-               if (sp > frame->stack) { \
-                       ins = dump_stack (frame->stack, sp); \
-               } else { \
-                       ins = g_strdup (""); \
-               } \
-               sp->data.l = 0; \
                output_indent (); \
                char *mn = mono_method_full_name (frame->imethod->method, FALSE); \
                char *disasm = mono_interp_dis_mintop ((gint32)(ip - frame->imethod->code), TRUE, ip + 1, *ip); \
-               g_print ("(%p) %s -> %s\t%s\n", mono_thread_internal_current (), mn, disasm, ins); \
+               g_print ("(%p) %s -> %s\n", mono_thread_internal_current (), mn, disasm); \
                g_free (mn); \
-               g_free (ins); \
                g_free (disasm); \
        }
 #else
@@ -3072,12 +3033,12 @@ mono_interp_load_remote_field (
        InterpMethod* imethod,
        MonoObject* o,
        const guint16* ip,
-       stackval* sp)
+       gpointer result)
 {
        g_assert (o); // Caller checks and throws exception properly.
 
        void* addr;
-       MonoClassField* const field = (MonoClassField*)imethod->data_items[ip [1]];
+       MonoClassField *field = (MonoClassField*)imethod->data_items [ip [3]];
 
 #ifndef DISABLE_REMOTING
        gpointer tmp;
@@ -3089,20 +3050,20 @@ mono_interp_load_remote_field (
        } else
 #endif
                addr = (char*)o + field->offset;
-       stackval_from_data (field->type, &sp [-1], addr, FALSE);
+       stackval_from_data (field->type, (stackval*)result, addr, FALSE);
 }
 
-static stackval*
+static void
 mono_interp_load_remote_field_vt (
        InterpMethod* imethod,
        MonoObject* o,
        const guint16* ip,
-       stackval* sp)
+       gpointer result)
 {
        g_assert (o); // Caller checks and throws exception properly.
 
        void* addr;
-       MonoClassField* const field = (MonoClassField*)imethod->data_items[ip [1]];
+       MonoClassField *field = (MonoClassField*)imethod->data_items [ip [3]];
        MonoClass* klass = mono_class_from_mono_type_internal (field->type);
        int const i32 = mono_class_value_size (klass, NULL);
 
@@ -3116,9 +3077,7 @@ mono_interp_load_remote_field_vt (
        } else
 #endif
                addr = (char*)o + field->offset;
-       sp--;
-       memcpy ((char*)sp, addr, i32);
-       return STACK_ADD_BYTES (sp, i32);
+       memcpy (result, addr, i32);
 }
 
 static gboolean
@@ -3176,14 +3135,14 @@ mono_interp_leave (InterpFrame* parent_frame)
        return (MonoException*)tmp_sp.data.p;
 }
 
-static void
-mono_interp_enum_hasflag (stackval* sp, MonoClass* klass)
+static gint32
+mono_interp_enum_hasflag (stackval *sp1, stackval *sp2, MonoClass* klass)
 {
        guint64 a_val = 0, b_val = 0;
 
-       stackval_to_data (m_class_get_byval_arg (klass), --sp, &b_val, FALSE);
-       stackval_to_data (m_class_get_byval_arg (klass), --sp, &a_val, FALSE);
-       sp->data.i = (a_val & b_val) == b_val;
+       stackval_to_data (m_class_get_byval_arg (klass), sp1, &a_val, FALSE);
+       stackval_to_data (m_class_get_byval_arg (klass), sp2, &b_val, FALSE);
+       return (a_val & b_val) == b_val;
 }
 
 // varargs in wasm consumes extra linear stack per call-site.
@@ -3240,13 +3199,11 @@ method_entry (ThreadContext *context, InterpFrame *frame,
 /* Save the state of the interpeter main loop into FRAME */
 #define SAVE_INTERP_STATE(frame) do { \
        frame->state.ip = ip;  \
-       frame->state.sp = sp; \
        } while (0)
 
 /* Load and clear state from FRAME */
 #define LOAD_INTERP_STATE(frame) do { \
        ip = frame->state.ip; \
-       sp = frame->state.sp; \
        locals = (unsigned char *)frame->stack; \
        frame->state.ip = NULL; \
        } while (0)
@@ -3255,13 +3212,14 @@ method_entry (ThreadContext *context, InterpFrame *frame,
 #define INIT_INTERP_STATE(frame, _clause_args) do {     \
        ip = _clause_args ? ((FrameClauseArgs *)_clause_args)->start_with_ip : (frame)->imethod->code; \
        locals = (unsigned char *)(frame)->stack; \
-       sp = (stackval*)(locals + (frame)->imethod->total_locals_size); \
        } while (0)
 
 #if PROFILE_INTERP
 static long total_executed_opcodes;
 #endif
 
+#define LOCAL_VAR(offset,type) (*(type*)(locals + (offset)))
+
 /*
  * If CLAUSE_ARGS is non-null, start executing from it.
  * The ERROR argument is used to avoid declaring an error object for every interp frame, its not used
@@ -3277,12 +3235,11 @@ interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs
 
        /* Interpreter main loop state (InterpState) */
        const guint16 *ip = NULL;
-       stackval *sp;
        unsigned char *locals = NULL;
+       int call_args_offset;
 
 #if DEBUG_INTERP
        int tracing = global_tracing;
-       unsigned char *vtalloc;
 #endif
 #if USE_COMPUTED_GOTO
        static void * const in_labels[] = {
@@ -3328,8 +3285,8 @@ interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClauseArgs
        INIT_INTERP_STATE (frame, clause_args);
 
        if (clause_args && clause_args->filter_exception) {
-               sp->data.p = clause_args->filter_exception;
-               sp++;
+               // Write the exception on to the first slot on the excecution stack
+               LOCAL_VAR (frame->imethod->total_locals_size, MonoException*) = clause_args->filter_exception;
        }
 
 #ifdef ENABLE_EXPERIMENT_TIERED
@@ -3351,13 +3308,6 @@ main_loop:
                total_executed_opcodes++;
 #endif
                MintOpcode opcode;
-#ifdef ENABLE_CHECKED_BUILD
-               guchar *sp_start = (guchar*)frame->stack + frame->imethod->total_locals_size;
-               guchar *sp_end = sp_start + frame->imethod->stack_size;
-               g_assert (locals == (guchar*)frame->stack);
-               g_assert ((guchar*)sp >= sp_start);
-               g_assert ((guchar*)sp <= sp_end);
-#endif
                DUMP_INSTR();
                MINT_IN_SWITCH (*ip) {
                MINT_IN_CASE(MINT_INITLOCALS)
@@ -3376,15 +3326,14 @@ main_loop:
                        ++ip;
                        mono_break ();
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDNULL) 
-                       sp->data.p = NULL;
-                       ++ip;
-                       ++sp;
+               MINT_IN_CASE(MINT_LDNULL)
+                       LOCAL_VAR (ip [1], gpointer) = NULL;
+                       ip += 2;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_INIT_ARGLIST) {
-                       const guint16 *call_ip = frame->parent->state.ip - 4;
+                       const guint16 *call_ip = frame->parent->state.ip - 5;
                        g_assert_checked (*call_ip == MINT_CALL_VARARG);
-                       int params_stack_size = call_ip [2];
+                       int params_stack_size = call_ip [4];
                        MonoMethodSignature *sig = (MonoMethodSignature*)frame->parent->imethod->data_items [call_ip [3]];
 
                        // we are being overly conservative with the size here, for simplicity
@@ -3393,13 +3342,13 @@ main_loop:
                        init_arglist (frame, sig, STACK_ADD_BYTES (frame->stack, ip [2]), (char*)arglist);
 
                        // save the arglist for future access with MINT_ARGLIST
-                       *(gpointer*)(locals + ip [1]) = arglist;
+                       LOCAL_VAR (ip [1], gpointer) = arglist;
 
                        ip += 3;
                        MINT_IN_BREAK;
                }
 
-#define LDC(n) do { sp->data.i = (n); ++ip; ++sp; } while (0)
+#define LDC(n) do { LOCAL_VAR (ip [1], gint32) = (n); ip += 2; } while (0)
                MINT_IN_CASE(MINT_LDC_I4_M1)
                        LDC(-1);
                        MINT_IN_BREAK;
@@ -3431,73 +3380,31 @@ main_loop:
                        LDC(8);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDC_I4_S) 
-                       sp->data.i = (short)ip [1];
-                       ip += 2;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gint32) = (short)ip [2];
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDC_I4)
-                       ++ip;
-                       sp->data.i = READ32 (ip);
-                       ip += 2;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gint32) = READ32 (ip + 2);
+                       ip += 4;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDC_I8)
-                       ++ip;
-                       sp->data.l = READ64 (ip);
-                       ip += 4;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gint64) = READ64 (ip + 2);
+                       ip += 6;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDC_I8_S)
-                       sp->data.l = (short)ip [1];
-                       ip += 2;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gint64) = (short)ip [2];
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDC_R4) {
-                       guint32 val;
-                       ++ip;
-                       val = READ32(ip);
-                       sp->data.f_r4 = * (float *)&val;
-                       ip += 2;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gint32) = READ32(ip + 2); /* not union usage */
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDC_R8) 
-                       sp->data.l = READ64 (ip + 1); /* note union usage */
-                       ip += 5;
-                       ++sp;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_DUP) 
-                       sp [0] = sp[-1];
-                       ++sp;
-                       ++ip; 
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_DUP_VT) {
-                       int i32 = READ32 (ip + 1);
-                       memcpy (sp, STACK_SUB_BYTES (sp, i32), i32);
-                       sp = STACK_ADD_BYTES (sp, i32);
-                       ip += 3;
-                       MINT_IN_BREAK;
-               }
-               MINT_IN_CASE(MINT_POP) {
-                       sp--;
-                       ip++;
-                       MINT_IN_BREAK;
-               }
-               MINT_IN_CASE(MINT_POP_VT) {
-                       int i32 = READ32 (ip + 1);
-                       i32 = ALIGN_TO (i32, MINT_STACK_SLOT_SIZE);
-                       sp = STACK_SUB_BYTES (sp, i32);
-                       ip += 3;
+                       LOCAL_VAR (ip [1], gint64) = READ64 (ip + 2); /* note union usage */
+                       ip += 6;
                        MINT_IN_BREAK;
-               }
-               MINT_IN_CASE(MINT_POP1) {
-                       sp [-2] = sp [-1];
-                       sp--;
-                       ip++;
-                       MINT_IN_BREAK;
-               }
                MINT_IN_CASE(MINT_JMP) {
-                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size));
                        InterpMethod *new_method = (InterpMethod*)frame->imethod->data_items [ip [1]];
 
                        if (frame->imethod->prof_flags & MONO_PROFILER_CALL_INSTRUMENTATION_TAIL_CALL)
@@ -3518,16 +3425,15 @@ main_loop:
                         */
                        context->stack_pointer = (guchar*)frame->stack + new_method->alloca_size;
                        frame->imethod = new_method;
-                       sp = (stackval*)(locals + frame->imethod->total_locals_size);
                        ip = frame->imethod->code;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CALL_DELEGATE) {
                        // FIXME We don't need to encode the whole signature, just param_count
-                       MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [1]];
+                       MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [3]];
                        int param_count = csignature->param_count;
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
-                       MonoDelegate *del = (MonoDelegate*) sp [0].data.o;
+                       call_args_offset = ip [1];
+                       MonoDelegate *del = LOCAL_VAR (call_args_offset, MonoDelegate*);
                        gboolean is_multicast = del->method == NULL;
                        InterpMethod *del_imethod = (InterpMethod*)del->interp_invoke_impl;
 
@@ -3556,7 +3462,7 @@ main_loop:
                                        } else if (del_imethod->method->flags & METHOD_ATTRIBUTE_VIRTUAL && !del->target) {
                                                // 'this' is passed dynamically, we need to recompute the target method
                                                // with each call
-                                               del_imethod = get_virtual_method (del_imethod, sp [1].data.o->vtable);
+                                               del_imethod = get_virtual_method (del_imethod, LOCAL_VAR (call_args_offset + MINT_STACK_SLOT_SIZE, MonoObject*)->vtable);
                                        } else {
                                                del->interp_invoke_impl = del_imethod;
                                        }
@@ -3568,106 +3474,94 @@ main_loop:
                                        // Target method is static but the delegate has a target object. We handle
                                        // this separately from the case below, because, for these calls, the instance
                                        // is allowed to be null.
-                                       sp [0].data.o = del->target;
+                                       LOCAL_VAR (ip [1], MonoObject*) = del->target;
                                } else if (del->target) {
                                        MonoObject *this_arg = del->target;
 
                                        // replace the MonoDelegate* on the stack with 'this' pointer
                                        if (m_class_is_valuetype (this_arg->vtable->klass)) {
                                                gpointer unboxed = mono_object_unbox_internal (this_arg);
-                                               sp [0].data.p = unboxed;
+                                               LOCAL_VAR (ip [1], gpointer) = unboxed;
                                        } else {
-                                               sp [0].data.o = this_arg;
+                                               LOCAL_VAR (ip [1], MonoObject*) = this_arg;
                                        }
                                } else {
                                        // skip the delegate pointer for static calls
                                        // FIXME we could avoid memmove
-                                       memmove (sp, sp + 1, ip [2]);
+                                       memmove (locals + call_args_offset, locals + call_args_offset + MINT_STACK_SLOT_SIZE, ip [2]);
                                }
                        }
-                       ip += 3;
+                       ip += 4;
 
                        goto call;
                }
                MINT_IN_CASE(MINT_CALLI) {
                        MonoMethodSignature *csignature;
 
-                       csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [1]];
-                       --sp;
+                       csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [3]];
 
-                       cmethod = (InterpMethod*)sp->data.p;
+                       cmethod = LOCAL_VAR (ip [2], InterpMethod*);
                        if (cmethod->method->flags & METHOD_ATTRIBUTE_PINVOKE_IMPL) {
                                cmethod = mono_interp_get_imethod (frame->imethod->domain, mono_marshal_get_native_wrapper (cmethod->method, FALSE, FALSE), error);
                                mono_interp_error_cleanup (error); /* FIXME: don't swallow the error */
                        }
 
-                       /* decrement by the actual number of args */
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
+                       call_args_offset = ip [1];
 
                        if (csignature->hasthis) {
-                               MonoObject *this_arg = (MonoObject*)sp->data.p;
+                               MonoObject *this_arg = LOCAL_VAR (call_args_offset, MonoObject*); 
 
                                if (m_class_is_valuetype (this_arg->vtable->klass)) {
                                        gpointer unboxed = mono_object_unbox_internal (this_arg);
-                                       sp [0].data.p = unboxed;
+                                       LOCAL_VAR (call_args_offset, gpointer) = unboxed;
                                }
                        }
-                       ip += 3;
+                       ip += 4;
 
                        goto call;
                }
                MINT_IN_CASE(MINT_CALLI_NAT_FAST) {
-                       gpointer target_ip = sp [-1].data.p;
-                       MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [1]];
-                       int opcode = ip [2];
-                       gboolean save_last_error = ip [3];
+                       MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [2]];
+                       int opcode = ip [3];
+                       gboolean save_last_error = ip [4];
 
-                       sp--;
+                       stackval *args = (stackval*)(locals + ip [1]);
+                       gpointer target_ip = args [csignature->param_count].data.p;
                        /* for calls, have ip pointing at the start of next instruction */
-                       frame->state.ip = ip + 4;
+                       frame->state.ip = ip + 5;
 
-                       sp = do_icall_wrapper (frame, csignature, opcode, sp, target_ip, save_last_error);
+                       do_icall_wrapper (frame, csignature, opcode, args, target_ip, save_last_error);
                        EXCEPTION_CHECKPOINT_GC_UNSAFE;
                        CHECK_RESUME_STATE (context);
-                       ip += 4;
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CALLI_NAT_DYNAMIC) {
-                       MonoMethodSignature* csignature;
-
-                       csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [1]];
-
-                       --sp;
-                       guchar* code = (guchar*)sp->data.p;
+                       MonoMethodSignature* csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [3]];
 
-                       /* decrement by the actual number of args */
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
+                       call_args_offset = ip [1];
+                       guchar* code = LOCAL_VAR (ip [2], guchar*);
 
                        cmethod = mono_interp_get_native_func_wrapper (frame->imethod, csignature, code);
 
-                       ip += 3;
+                       ip += 4;
                        goto call;
                }
                MINT_IN_CASE(MINT_CALLI_NAT) {
-                       MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [1]];
-                       InterpMethod *imethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
+                       MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [3]];
+                       InterpMethod *imethod = (InterpMethod*)frame->imethod->data_items [ip [4]];
 
-                       --sp;
-                       guchar* const code = (guchar*)sp->data.p;
-
-                       sp = STACK_SUB_BYTES (sp, ip [3]);
+                       guchar *code = LOCAL_VAR (ip [2], guchar*);
 
                        gboolean save_last_error = ip [5];
                        gpointer *cache = (gpointer*)&frame->imethod->data_items [ip [6]];
                        /* for calls, have ip pointing at the start of next instruction */
-                       frame->state.ip = ip + 6;
-                       ves_pinvoke_method (imethod, csignature, (MonoFuncV)code, context, frame, sp, save_last_error, cache);
+                       frame->state.ip = ip + 7;
+                       ves_pinvoke_method (imethod, csignature, (MonoFuncV)code, context, frame, (stackval*)(locals + ip [1]), save_last_error, cache);
 
                        EXCEPTION_CHECKPOINT_GC_UNSAFE;
                        CHECK_RESUME_STATE (context);
 
-                       // Result was written directly at top of stack
-                       sp = STACK_ADD_BYTES (sp, ip [4]);
                        ip += 7;
                        MINT_IN_BREAK;
                }
@@ -3675,10 +3569,10 @@ main_loop:
                        MonoObject *this_arg;
                        int slot;
 
-                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [1]];
+                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
+                       call_args_offset = ip [1];
 
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
-                       this_arg = (MonoObject*)sp->data.p;
+                       this_arg = LOCAL_VAR (call_args_offset, MonoObject*);
 
                        slot = (gint16)ip [3];
                        ip += 4;
@@ -3686,7 +3580,7 @@ main_loop:
                        if (m_class_is_valuetype (this_arg->vtable->klass) && m_class_is_valuetype (cmethod->method->klass)) {
                                /* unbox */
                                gpointer unboxed = mono_object_unbox_internal (this_arg);
-                               sp [0].data.p = unboxed;
+                               LOCAL_VAR (call_args_offset, gpointer) = unboxed;
                        }
 
                        InterpMethodCodeType code_type = cmethod->code_type;
@@ -3711,16 +3605,13 @@ main_loop:
                        } else if (code_type == IMETHOD_CODE_COMPILED) {
                                frame->state.ip = ip;
                                error_init_reuse (error);
-                               do_jit_call (sp, frame, cmethod, error);
+                               do_jit_call ((stackval*)(locals + call_args_offset), frame, cmethod, error);
                                if (!is_ok (error)) {
                                        MonoException *ex = mono_error_convert_to_exception (error);
                                        THROW_EX (ex, ip);
                                }
 
                                CHECK_RESUME_STATE (context);
-
-                               if (cmethod->rtype->type != MONO_TYPE_VOID)
-                                       sp = STACK_ADD_BYTES (sp, ((JitCallInfo*)cmethod->jit_call_info)->res_size);
                        }
 
                        MINT_IN_BREAK;
@@ -3728,25 +3619,24 @@ main_loop:
                MINT_IN_CASE(MINT_CALL_VARARG) {
                        // Same as MINT_CALL, except at ip [3] we have the index for the csignature,
                        // which is required by the called method to set up the arglist.
-                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [1]];
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
-                       ip += 4;
+                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
+                       call_args_offset = ip [1];
+                       ip += 5;
                        goto call;
                }
 
                MINT_IN_CASE(MINT_CALLVIRT) {
                        // FIXME CALLVIRT opcodes are not used on netcore. We should kill them.
-                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [1]];
-
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
+                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
+                       call_args_offset = ip [1];
 
-                       MonoObject *this_arg = (MonoObject*)sp->data.p;
+                       MonoObject *this_arg = LOCAL_VAR (call_args_offset, MonoObject*);
 
                        cmethod = get_virtual_method (cmethod, this_arg->vtable);
                        if (m_class_is_valuetype (this_arg->vtable->klass) && m_class_is_valuetype (cmethod->method->klass)) {
                                /* unbox */
                                gpointer unboxed = mono_object_unbox_internal (this_arg);
-                               sp [0].data.p = unboxed;
+                               LOCAL_VAR (call_args_offset, gpointer) = unboxed;
                        }
 
 #ifdef ENABLE_EXPERIMENT_TIERED
@@ -3757,8 +3647,8 @@ main_loop:
                        goto call;
                }
                MINT_IN_CASE(MINT_CALL) {
-                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [1]];
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
+                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
+                       call_args_offset = ip [1];
 
 #ifdef ENABLE_EXPERIMENT_TIERED
                        ip += 5;
@@ -3781,7 +3671,7 @@ call:
                                        // Not free currently, but will be when allocation attempted.
                                        frame->next_free = child_frame;
                                }
-                               reinit_frame (child_frame, frame, cmethod, sp);
+                               reinit_frame (child_frame, frame, cmethod, locals + call_args_offset);
                                frame = child_frame;
                        }
                        if (method_entry (context, frame,
@@ -3794,7 +3684,7 @@ call:
                                EXCEPTION_CHECKPOINT;
                        }
 
-                       context->stack_pointer = (guchar*)sp + cmethod->alloca_size;
+                       context->stack_pointer = (guchar*)frame->stack + cmethod->alloca_size;
                        /* Make sure the stack pointer is bumped before we store any references on the stack */
                        mono_compiler_barrier ();
 
@@ -3803,34 +3693,29 @@ call:
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_JIT_CALL) {
-                       InterpMethod *rmethod = (InterpMethod*)frame->imethod->data_items [ip [1]];
+                       InterpMethod *rmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
                        error_init_reuse (error);
-                       sp = STACK_SUB_BYTES (sp, ip [2]);
                        /* for calls, have ip pointing at the start of next instruction */
                        frame->state.ip = ip + 3;
-                       do_jit_call (sp, frame, rmethod, error);
+                       do_jit_call ((stackval*)(locals + ip [1]), frame, rmethod, error);
                        if (!is_ok (error)) {
                                MonoException *ex = mono_error_convert_to_exception (error);
                                THROW_EX (ex, ip);
                        }
 
                        CHECK_RESUME_STATE (context);
-
-                       if (rmethod->rtype->type != MONO_TYPE_VOID)
-                               sp = STACK_ADD_BYTES (sp, ((JitCallInfo*)rmethod->jit_call_info)->res_size);
                        ip += 3;
 
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_JIT_CALL2) {
 #ifdef ENABLE_EXPERIMENT_TIERED
-                       InterpMethod *rmethod = (InterpMethod *) READ64 (ip + 1);
+                       InterpMethod *rmethod = (InterpMethod *) READ64 (ip + 2);
 
                        error_init_reuse (error);
 
-                       sp -= rmethod->param_count + rmethod->hasthis;
-                       frame->state.ip = ip + 5;
-                       do_jit_call (sp, frame, rmethod, error);
+                       frame->state.ip = ip + 6;
+                       do_jit_call ((stackval*)(locals + ip [1]), frame, rmethod, error);
                        if (!is_ok (error)) {
                                MonoException *ex = mono_error_convert_to_exception (error);
                                THROW_EX (ex, ip);
@@ -3838,9 +3723,7 @@ call:
 
                        CHECK_RESUME_STATE (context);
 
-                       if (rmethod->rtype->type != MONO_TYPE_VOID)
-                               sp++;
-                       ip += 5;
+                       ip += 6;
 #else
                        g_error ("MINT_JIT_ICALL2 shouldn't be used");
 #endif
@@ -3848,73 +3731,38 @@ call:
                }
                MINT_IN_CASE(MINT_CALLRUN) {
 #ifndef ENABLE_NETCORE
-                       MonoMethod *target_method = (MonoMethod*) frame->imethod->data_items [ip [1]];
-                       MonoMethodSignature *sig = (MonoMethodSignature*) frame->imethod->data_items [ip [2]];
+                       MonoMethod *target_method = (MonoMethod*) frame->imethod->data_items [ip [2]];
+                       MonoMethodSignature *sig = (MonoMethodSignature*) frame->imethod->data_items [ip [3]];
 
-                       stackval *retval = sp;
-
-                       sp -= sig->param_count;
-                       if (sig->hasthis)
-                               sp--;
-
-                       MonoException *ex = ves_imethod (frame, target_method, sig, sp, retval);
+                       MonoException *ex = ves_imethod (frame, target_method, sig, (stackval*)(locals + ip [1]));
                        if (ex)
                                THROW_EX (ex, ip);
 
-                       if (sig->ret->type != MONO_TYPE_VOID) {
-                               *sp = *retval;
-                               sp++;
-                       }
-                       ip += 3;
+                       ip += 4;
 #else
                        g_assert_not_reached ();
 #endif
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_RET)
-                       --sp;
-                       if (frame->parent) {
-                               frame->parent->state.sp [0] = *sp;
-                               frame->parent->state.sp++;
-                       }
-                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size));
+                       frame->stack [0] = LOCAL_VAR (ip [1], stackval);
                        goto exit_frame;
                MINT_IN_CASE(MINT_RET_VOID)
-                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size));
                        goto exit_frame;
                MINT_IN_CASE(MINT_RET_VT) {
-                       int const i32 = READ32 (ip + 1);
-
-                       sp = STACK_SUB_BYTES (sp, i32);
-                       if (frame->parent) {
-                               memmove (frame->parent->state.sp, sp, i32);
-                               frame->parent->state.sp = STACK_ADD_BYTES (frame->parent->state.sp, i32);
-                       }
-                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size));
+                       memmove (frame->stack, locals + ip [1], ip [2]);
                        goto exit_frame;
                }
                MINT_IN_CASE(MINT_RET_LOCALLOC)
-                       --sp;
-                       if (frame->parent) {
-                               frame->parent->state.sp [0] = *sp;
-                               frame->parent->state.sp++;
-                       }
+                       frame->stack [0] = LOCAL_VAR (ip [1], stackval);
                        frame_data_allocator_pop (&context->data_stack, frame);
-                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size));
                        goto exit_frame;
                MINT_IN_CASE(MINT_RET_VOID_LOCALLOC)
                        frame_data_allocator_pop (&context->data_stack, frame);
-                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size));
                        goto exit_frame;
                MINT_IN_CASE(MINT_RET_VT_LOCALLOC) {
-                       int const i32 = READ32 (ip + 1);
-                       sp = STACK_SUB_BYTES (sp, i32);
-                       if (frame->parent) {
-                               memmove (frame->parent->state.sp, sp, i32);
-                               frame->parent->state.sp = STACK_ADD_BYTES (frame->parent->state.sp, i32);
-                       }
+                       memmove (frame->stack, locals + ip [1], ip [2]);
                        frame_data_allocator_pop (&context->data_stack, frame);
-                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size));
                        goto exit_frame;
                }
 
@@ -3940,752 +3788,868 @@ call:
                        MINT_IN_BREAK;
                }
 
-#define ZEROP_S(datamem, op) \
-       --sp; \
-       if (sp->data.datamem op 0) { \
-               gint16 br_offset = (gint16) ip [1]; \
+#define ZEROP_S(datatype, op) \
+       if (LOCAL_VAR (ip [1], datatype) op 0) { \
+               gint16 br_offset = (gint16) ip [2]; \
                BACK_BRANCH_PROFILE (br_offset); \
                ip += br_offset; \
        } else \
-               ip += 2;
+               ip += 3;
 
-#define ZEROP(datamem, op) \
-       --sp; \
-       if (sp->data.datamem op 0) { \
-               gint32 br_offset = (gint32)READ32(ip + 1); \
+#define ZEROP(datatype, op) \
+       if (LOCAL_VAR (ip [1], datatype) op 0) { \
+               gint32 br_offset = (gint32)READ32(ip + 2); \
                BACK_BRANCH_PROFILE (br_offset); \
                ip += br_offset; \
        } else \
-               ip += 3;
+               ip += 4;
 
                MINT_IN_CASE(MINT_BRFALSE_I4_S)
-                       ZEROP_S(i, ==);
+                       ZEROP_S(gint32, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRFALSE_I8_S)
-                       ZEROP_S(l, ==);
+                       ZEROP_S(gint64, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRFALSE_R4_S)
-                       ZEROP_S(f_r4, ==);
+                       ZEROP_S(float, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRFALSE_R8_S)
-                       ZEROP_S(f, ==);
+                       ZEROP_S(double, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRFALSE_I4)
-                       ZEROP(i, ==);
+                       ZEROP(gint32, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRFALSE_I8)
-                       ZEROP(l, ==);
+                       ZEROP(gint64, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRFALSE_R4)
-                       ZEROP_S(f_r4, ==);
+                       ZEROP_S(float, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRFALSE_R8)
-                       ZEROP_S(f, ==);
+                       ZEROP_S(double, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_I4_S)
-                       ZEROP_S(i, !=);
+                       ZEROP_S(gint32, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_I8_S)
-                       ZEROP_S(l, !=);
+                       ZEROP_S(gint64, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_R4_S)
-                       ZEROP_S(f_r4, !=);
+                       ZEROP_S(float, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_R8_S)
-                       ZEROP_S(f, !=);
+                       ZEROP_S(double, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_I4)
-                       ZEROP(i, !=);
+                       ZEROP(gint32, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_I8)
-                       ZEROP(l, !=);
+                       ZEROP(gint64, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_R4)
-                       ZEROP(f_r4, !=);
+                       ZEROP(float, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BRTRUE_R8)
-                       ZEROP(f, !=);
-                       MINT_IN_BREAK;
+                       ZEROP(double, !=);
+               MINT_IN_BREAK;
 #define CONDBR_S(cond) \
-       sp -= 2; \
        if (cond) { \
-               gint16 br_offset = (gint16) ip [1]; \
+               gint16 br_offset = (gint16) ip [3]; \
                BACK_BRANCH_PROFILE (br_offset); \
                ip += br_offset; \
        } else \
-               ip += 2;
-#define BRELOP_S(datamem, op) \
-       CONDBR_S(sp[0].data.datamem op sp[1].data.datamem)
+               ip += 4;
+#define BRELOP_S(datatype, op) \
+       CONDBR_S(LOCAL_VAR (ip [1], datatype) op LOCAL_VAR (ip [2], datatype))
 
 #define CONDBR(cond) \
-       sp -= 2; \
        if (cond) { \
-               gint32 br_offset = (gint32) READ32 (ip + 1); \
+               gint32 br_offset = (gint32) READ32 (ip + 3); \
                BACK_BRANCH_PROFILE (br_offset); \
                ip += br_offset; \
        } else \
-               ip += 3;
+               ip += 5;
 
-#define BRELOP(datamem, op) \
-       CONDBR(sp[0].data.datamem op sp[1].data.datamem)
+#define BRELOP(datatype, op) \
+       CONDBR(LOCAL_VAR (ip [1], datatype) op LOCAL_VAR (ip [2], datatype))
 
                MINT_IN_CASE(MINT_BEQ_I4_S)
-                       BRELOP_S(i, ==)
+                       BRELOP_S(gint32, ==)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BEQ_I8_S)
-                       BRELOP_S(l, ==)
+                       BRELOP_S(gint64, ==)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BEQ_R4_S)
-                       CONDBR_S(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 == sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BEQ_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(!isunordered (f1, f2) && f1 == f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BEQ_R8_S)
-                       CONDBR_S(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f == sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BEQ_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(!mono_isunordered (d1, d2) && d1 == d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BEQ_I4)
-                       BRELOP(i, ==)
+                       BRELOP(gint32, ==)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BEQ_I8)
-                       BRELOP(l, ==)
+                       BRELOP(gint64, ==)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BEQ_R4)
-                       CONDBR(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 == sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BEQ_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(!isunordered (f1, f2) && f1 == f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BEQ_R8)
-                       CONDBR(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f == sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BEQ_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(!mono_isunordered (d1, d2) && d1 == d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BGE_I4_S)
-                       BRELOP_S(i, >=)
+                       BRELOP_S(gint32, >=)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGE_I8_S)
-                       BRELOP_S(l, >=)
+                       BRELOP_S(gint64, >=)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_R4_S)
-                       CONDBR_S(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 >= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGE_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(!isunordered (f1, f2) && f1 >= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_R8_S)
-                       CONDBR_S(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f >= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGE_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(!mono_isunordered (d1, d2) && d1 >= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BGE_I4)
-                       BRELOP(i, >=)
+                       BRELOP(gint32, >=)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGE_I8)
-                       BRELOP(l, >=)
+                       BRELOP(gint64, >=)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_R4)
-                       CONDBR(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 >= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGE_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(!isunordered (f1, f2) && f1 >= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_R8)
-                       CONDBR(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f >= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGE_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(!mono_isunordered (d1, d2) && d1 >= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BGT_I4_S)
-                       BRELOP_S(i, >)
+                       BRELOP_S(gint32, >)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGT_I8_S)
-                       BRELOP_S(l, >)
+                       BRELOP_S(gint64, >)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_R4_S)
-                       CONDBR_S(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 > sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGT_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(!isunordered (f1, f2) && f1 > f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_R8_S)
-                       CONDBR_S(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f > sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGT_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(!mono_isunordered (d1, d2) && d1 > d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BGT_I4)
-                       BRELOP(i, >)
+                       BRELOP(gint32, >)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGT_I8)
-                       BRELOP(l, >)
+                       BRELOP(gint64, >)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_R4)
-                       CONDBR(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 > sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGT_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(!isunordered (f1, f2) && f1 > f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_R8)
-                       CONDBR(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f > sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGT_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(!mono_isunordered (d1, d2) && d1 > d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLT_I4_S)
-                       BRELOP_S(i, <)
+                       BRELOP_S(gint32, <)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLT_I8_S)
-                       BRELOP_S(l, <)
+                       BRELOP_S(gint64, <)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_R4_S)
-                       CONDBR_S(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 < sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLT_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(!isunordered (f1, f2) && f1 < f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_R8_S)
-                       CONDBR_S(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f < sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLT_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(!mono_isunordered (d1, d2) && d1 < d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLT_I4)
-                       BRELOP(i, <)
+                       BRELOP(gint32, <)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLT_I8)
-                       BRELOP(l, <)
+                       BRELOP(gint64, <)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_R4)
-                       CONDBR(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 < sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLT_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(!isunordered (f1, f2) && f1 < f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_R8)
-                       CONDBR(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f < sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLT_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(!mono_isunordered (d1, d2) && d1 < d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLE_I4_S)
-                       BRELOP_S(i, <=)
+                       BRELOP_S(gint32, <=)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLE_I8_S)
-                       BRELOP_S(l, <=)
+                       BRELOP_S(gint64, <=)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_R4_S)
-                       CONDBR_S(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 <= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLE_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(!isunordered (f1, f2) && f1 <= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_R8_S)
-                       CONDBR_S(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f <= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLE_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(!mono_isunordered (d1, d2) && d1 <= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLE_I4)
-                       BRELOP(i, <=)
+                       BRELOP(gint32, <=)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLE_I8)
-                       BRELOP(l, <=)
+                       BRELOP(gint64, <=)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_R4)
-                       CONDBR(!isunordered (sp [0].data.f_r4, sp [1].data.f_r4) && sp[0].data.f_r4 <= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLE_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(!isunordered (f1, f2) && f1 <= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_R8)
-                       CONDBR(!mono_isunordered (sp [0].data.f, sp [1].data.f) && sp[0].data.f <= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLE_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(!mono_isunordered (d1, d2) && d1 <= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BNE_UN_I4_S)
-                       BRELOP_S(i, !=)
+                       BRELOP_S(gint32, !=)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BNE_UN_I8_S)
-                       BRELOP_S(l, !=)
+                       BRELOP_S(gint64, !=)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BNE_UN_R4_S)
-                       CONDBR_S(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 != sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BNE_UN_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(isunordered (f1, f2) || f1 != f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BNE_UN_R8_S)
-                       CONDBR_S(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f != sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BNE_UN_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(mono_isunordered (d1, d2) || d1 != d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BNE_UN_I4)
-                       BRELOP(i, !=)
+                       BRELOP(gint32, !=)
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BNE_UN_I8)
-                       BRELOP(l, !=)
+                       BRELOP(gint64, !=)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BNE_UN_R4)
-                       CONDBR(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 != sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BNE_UN_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(isunordered (f1, f2) || f1 != f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BNE_UN_R8)
-                       CONDBR(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f != sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BNE_UN_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(mono_isunordered (d1, d2) || d1 != d2)
                        MINT_IN_BREAK;
+               }
 
-#define BRELOP_S_CAST(datamem, op, type) \
-       sp -= 2; \
-       if ((type) sp[0].data.datamem op (type) sp[1].data.datamem) { \
-               gint16 br_offset = (gint16) ip [1]; \
+#define BRELOP_S_CAST(datatype, op) \
+       if (LOCAL_VAR (ip [1], datatype) op LOCAL_VAR (ip [2], datatype)) { \
+               gint16 br_offset = (gint16) ip [3]; \
                BACK_BRANCH_PROFILE (br_offset); \
                ip += br_offset; \
        } else \
-               ip += 2;
+               ip += 4;
 
-#define BRELOP_CAST(datamem, op, type) \
-       sp -= 2; \
-       if ((type) sp[0].data.datamem op (type) sp[1].data.datamem) { \
+#define BRELOP_CAST(datatype, op) \
+       if (LOCAL_VAR (ip [1], datatype) op LOCAL_VAR (ip [2], datatype)) { \
                gint32 br_offset = (gint32) ip [1]; \
                BACK_BRANCH_PROFILE (br_offset); \
                ip += br_offset; \
        } else \
-               ip += 3;
+               ip += 5;
 
                MINT_IN_CASE(MINT_BGE_UN_I4_S)
-                       BRELOP_S_CAST(i, >=, guint32);
+                       BRELOP_S_CAST(guint32, >=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGE_UN_I8_S)
-                       BRELOP_S_CAST(l, >=, guint64);
+                       BRELOP_S_CAST(guint64, >=);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_UN_R4_S)
-                       CONDBR_S(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 >= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGE_UN_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(isunordered (f1, f2) || f1 >= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_UN_R8_S)
-                       CONDBR_S(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f >= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGE_UN_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(mono_isunordered (d1, d2) || d1 >= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BGE_UN_I4)
-                       BRELOP_CAST(i, >=, guint32);
+                       BRELOP_CAST(guint32, >=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGE_UN_I8)
-                       BRELOP_CAST(l, >=, guint64);
+                       BRELOP_CAST(guint64, >=);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_UN_R4)
-                       CONDBR(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 >= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGE_UN_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(isunordered (f1, f2) || f1 >= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGE_UN_R8)
-                       CONDBR(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f >= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGE_UN_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(mono_isunordered (d1, d2) || d1 >= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BGT_UN_I4_S)
-                       BRELOP_S_CAST(i, >, guint32);
+                       BRELOP_S_CAST(guint32, >);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGT_UN_I8_S)
-                       BRELOP_S_CAST(l, >, guint64);
+                       BRELOP_S_CAST(guint64, >);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_UN_R4_S)
-                       CONDBR_S(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 > sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGT_UN_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(isunordered (f1, f2) || f1 > f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_UN_R8_S)
-                       CONDBR_S(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f > sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGT_UN_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(mono_isunordered (d1, d2) || d1 > d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BGT_UN_I4)
-                       BRELOP_CAST(i, >, guint32);
+                       BRELOP_CAST(guint32, >);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BGT_UN_I8)
-                       BRELOP_CAST(l, >, guint64);
+                       BRELOP_CAST(guint64, >);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_UN_R4)
-                       CONDBR(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 > sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BGT_UN_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(isunordered (f1, f2) || f1 > f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BGT_UN_R8)
-                       CONDBR(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f > sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BGT_UN_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(mono_isunordered (d1, d2) || d1 > d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLE_UN_I4_S)
-                       BRELOP_S_CAST(i, <=, guint32);
+                       BRELOP_S_CAST(guint32, <=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLE_UN_I8_S)
-                       BRELOP_S_CAST(l, <=, guint64);
+                       BRELOP_S_CAST(guint64, <=);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_UN_R4_S)
-                       CONDBR_S(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 <= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLE_UN_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(isunordered (f1, f2) || f1 <= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_UN_R8_S)
-                       CONDBR_S(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f <= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLE_UN_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(mono_isunordered (d1, d2) || d1 <= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLE_UN_I4)
-                       BRELOP_CAST(i, <=, guint32);
+                       BRELOP_CAST(guint32, <=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLE_UN_I8)
-                       BRELOP_CAST(l, <=, guint64);
+                       BRELOP_CAST(guint64, <=);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_UN_R4)
-                       CONDBR(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 <= sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLE_UN_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(isunordered (f1, f2) || f1 <= f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLE_UN_R8)
-                       CONDBR(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f <= sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLE_UN_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(mono_isunordered (d1, d2) || d1 <= d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLT_UN_I4_S)
-                       BRELOP_S_CAST(i, <, guint32);
+                       BRELOP_S_CAST(guint32, <);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLT_UN_I8_S)
-                       BRELOP_S_CAST(l, <, guint64);
+                       BRELOP_S_CAST(guint64, <);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_UN_R4_S)
-                       CONDBR_S(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 < sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLT_UN_R4_S) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR_S(isunordered (f1, f2) || f1 < f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_UN_R8_S)
-                       CONDBR_S(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f < sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLT_UN_R8_S) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR_S(mono_isunordered (d1, d2) || d1 < d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_BLT_UN_I4)
-                       BRELOP_CAST(i, <, guint32);
+                       BRELOP_CAST(guint32, <);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_BLT_UN_I8)
-                       BRELOP_CAST(l, <, guint64);
+                       BRELOP_CAST(guint64, <);
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_UN_R4)
-                       CONDBR(isunordered (sp [0].data.f_r4, sp [1].data.f_r4) || sp[0].data.f_r4 < sp[1].data.f_r4)
+               MINT_IN_CASE(MINT_BLT_UN_R4) {
+                       float f1 = LOCAL_VAR (ip [1], float);
+                       float f2 = LOCAL_VAR (ip [2], float);
+                       CONDBR(isunordered (f1, f2) || f1 < f2)
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_BLT_UN_R8)
-                       CONDBR(mono_isunordered (sp [0].data.f, sp [1].data.f) || sp[0].data.f < sp[1].data.f)
+               }
+               MINT_IN_CASE(MINT_BLT_UN_R8) {
+                       double d1 = LOCAL_VAR (ip [1], double);
+                       double d2 = LOCAL_VAR (ip [2], double);
+                       CONDBR(mono_isunordered (d1, d2) || d1 < d2)
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_SWITCH) {
-                       guint32 n;
-                       const unsigned short *st;
-                       ++ip;
-                       n = READ32 (ip);
-                       ip += 2;
-                       st = ip + 2 * n;
-                       --sp;
-                       if ((guint32)sp->data.i < n) {
-                               gint offset;
-                               ip += 2 * (guint32)sp->data.i;
-                               offset = READ32 (ip);
-                               ip = ip + offset;
+                       guint32 val = LOCAL_VAR (ip [1], guint32);
+                       guint32 n = READ32 (ip + 2);
+                       ip += 4;
+                       if (val < n) {
+                               ip += 2 * val;
+                               int offset = READ32 (ip);
+                               ip += offset;
                        } else {
-                               ip = st;
+                               ip += 2 * n;
                        }
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_LDIND_I1_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
-                       sp[-1].data.i = *(gint8*)sp[-1].data.p;
+               MINT_IN_CASE(MINT_LDIND_I1_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
+                       LOCAL_VAR (ip [1], int) = *(gint8*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDIND_U1_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
-                       sp[-1].data.i = *(guint8*)sp[-1].data.p;
+               }
+               MINT_IN_CASE(MINT_LDIND_U1_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
+                       LOCAL_VAR (ip [1], int) = *(guint8*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDIND_I2_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
-                       sp[-1].data.i = *(gint16*)sp[-1].data.p;
+               }
+               MINT_IN_CASE(MINT_LDIND_I2_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
+                       LOCAL_VAR (ip [1], int) = *(gint16*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDIND_U2_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
-                       sp[-1].data.i = *(guint16*)sp[-1].data.p;
+               }
+               MINT_IN_CASE(MINT_LDIND_U2_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
+                       LOCAL_VAR (ip [1], int) = *(guint16*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_LDIND_I4_CHECK) /* Fall through */
-               MINT_IN_CASE(MINT_LDIND_U4_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
-                       sp[-1].data.i = *(gint32*)sp[-1].data.p;
+               MINT_IN_CASE(MINT_LDIND_U4_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
+                       LOCAL_VAR (ip [1], int) = *(gint32*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDIND_I8_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
+               }
+               MINT_IN_CASE(MINT_LDIND_I8_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
 #ifdef NO_UNALIGNED_ACCESS
-                       if ((gsize)sp [-1].data.p % SIZEOF_VOID_P)
-                               memcpy (&sp [-1].data.l, sp [-1].data.p, sizeof (gint64));
+                       if ((gsize)ptr % SIZEOF_VOID_P)
+                               memcpy (locals + ip [1], ptr, sizeof (gint64));
                        else
 #endif
-                       sp[-1].data.l = *(gint64*)sp[-1].data.p;
+                       LOCAL_VAR (ip [1], gint64) = *(gint64*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_LDIND_I) {
-                       guint16 offset = ip [1];
-                       // This doesn't follow the current stack based design, but we plan to switch to explicit offsets.
-                       stackval *addr = (stackval*)(locals + frame->imethod->total_locals_size + offset);
-                       addr->data.p = *(gpointer*)addr->data.p;
-                       ip += 2;
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       LOCAL_VAR (ip [1], gpointer) = *(gpointer*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDIND_I8) {
-                       guint16 offset = ip [1];
-                       // This doesn't follow the current stack based design, but we plan to switch to explicit offsets.
-                       stackval *addr = (stackval*)(locals + frame->imethod->total_locals_size + offset);
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
 #ifdef NO_UNALIGNED_ACCESS
-                       if ((gsize)addr->data.p % SIZEOF_VOID_P)
-                               memcpy (&addr->data.l, addr->data.p, sizeof (gint64));
+                       if ((gsize)ptr % SIZEOF_VOID_P)
+                               memcpy (locals + ip [1], ptr, sizeof (gint64));
                        else
 #endif
-                       addr->data.l = *(gint64*)addr->data.p;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gint64) = *(gint64*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_LDIND_R4_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
-                       sp[-1].data.f_r4 = *(gfloat*)sp[-1].data.p;
+               MINT_IN_CASE(MINT_LDIND_R4_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
+                       LOCAL_VAR (ip [1], float) = *(gfloat*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDIND_R8_CHECK)
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
+               }
+               MINT_IN_CASE(MINT_LDIND_R8_CHECK) {
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
 #ifdef NO_UNALIGNED_ACCESS
-                       if ((gsize)sp [-1].data.p % SIZEOF_VOID_P)
-                               memcpy (&sp [-1].data.f, sp [-1].data.p, sizeof (gdouble));
+                       if ((gsize)ptr % SIZEOF_VOID_P)
+                               memcpy (locals + ip [1], ptr, sizeof (gdouble));
                        else
 #endif
-                       sp[-1].data.f = *(gdouble*)sp[-1].data.p;
+                       LOCAL_VAR (ip [1], double) = *(gdouble*)ptr;
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_LDIND_REF)
-                       ++ip;
-                       sp[-1].data.p = *(gpointer*)sp[-1].data.p;
+                       LOCAL_VAR (ip [1], gpointer) = *(gpointer*)LOCAL_VAR (ip [2], gpointer);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDIND_REF_CHECK) {
-                       NULL_CHECK (sp [-1].data.p);
-                       ++ip;
-                       sp [-1].data.p = *(gpointer*)sp [-1].data.p;
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       NULL_CHECK (ptr);
+                       LOCAL_VAR (ip [1], gpointer) = *(gpointer*)LOCAL_VAR (ip [2], gpointer);
+                       ip += 3;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_STIND_REF) 
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
-                       mono_gc_wbarrier_generic_store_internal (sp->data.p, sp [1].data.o);
+               MINT_IN_CASE(MINT_STIND_REF) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
+                       mono_gc_wbarrier_generic_store_internal (ptr, LOCAL_VAR (ip [2], MonoObject*));
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STIND_I1)
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
-                       * (gint8 *) sp->data.p = (gint8)sp[1].data.i;
+               }
+               MINT_IN_CASE(MINT_STIND_I1) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
+                       *(gint8*)ptr = LOCAL_VAR (ip [2], gint8);
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STIND_I2)
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
-                       * (gint16 *) sp->data.p = (gint16)sp[1].data.i;
+               }
+               MINT_IN_CASE(MINT_STIND_I2) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
+                       *(gint16*)ptr = LOCAL_VAR (ip [2], gint16);
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STIND_I4)
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
-                       * (gint32 *) sp->data.p = sp[1].data.i;
+               }
+               MINT_IN_CASE(MINT_STIND_I4) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
+                       *(gint32*)ptr = LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STIND_I)
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
-                       * (mono_i *) sp->data.p = (mono_i)sp[1].data.p;
+               }
+               MINT_IN_CASE(MINT_STIND_I) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
+                       *(mono_i*)ptr = LOCAL_VAR (ip [2], mono_i);
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STIND_I8)
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
+               }
+               MINT_IN_CASE(MINT_STIND_I8) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
 #ifdef NO_UNALIGNED_ACCESS
-                       if ((gsize)sp->data.p % SIZEOF_VOID_P)
-                               memcpy (sp->data.p, &sp [1].data.l, sizeof (gint64));
+                       if ((gsize)ptr % SIZEOF_VOID_P)
+                               memcpy (ptr, locals + ip [2], sizeof (gint64));
                        else
 #endif
-                       * (gint64 *) sp->data.p = sp[1].data.l;
+                       *(gint64*)ptr = LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STIND_R4)
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
-                       * (float *) sp->data.p = sp[1].data.f_r4;
+               }
+               MINT_IN_CASE(MINT_STIND_R4) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
+                       *(float*)ptr = LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STIND_R8)
-                       NULL_CHECK (sp [-2].data.p);
-                       ++ip;
-                       sp -= 2;
+               }
+               MINT_IN_CASE(MINT_STIND_R8) {
+                       gpointer ptr = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (ptr);
 #ifdef NO_UNALIGNED_ACCESS
-                       if ((gsize)sp->data.p % SIZEOF_VOID_P)
-                               memcpy (sp->data.p, &sp [1].data.f, sizeof (double));
+                       if ((gsize)ptr % SIZEOF_VOID_P)
+                               memcpy (ptr, locals + ip [2], sizeof (double));
                        else
 #endif
-                       * (double *) sp->data.p = sp[1].data.f;
+                       *(double*)ptr = LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_MONO_ATOMIC_STORE_I4)
-                       ++ip;
-                       sp -= 2;
-                       mono_atomic_store_i32 ((gint32 *) sp->data.p, sp [1].data.i);
+                       mono_atomic_store_i32 (LOCAL_VAR (ip [1], gint32*), LOCAL_VAR (ip [2], gint32));
+                       ip += 3;
                        MINT_IN_BREAK;
-#define BINOP(datamem, op) \
-       --sp; \
-       sp [-1].data.datamem op ## = sp [0].data.datamem; \
-       ++ip;
+#define BINOP(datatype, op) \
+       LOCAL_VAR (ip [1], datatype) = LOCAL_VAR (ip [2], datatype) op LOCAL_VAR (ip [3], datatype); \
+       ip += 4;
                MINT_IN_CASE(MINT_ADD_I4)
-                       BINOP(i, +);
+                       BINOP(gint32, +);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ADD_I8)
-                       BINOP(l, +);
+                       BINOP(gint64, +);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ADD_R4)
-                       BINOP(f_r4, +);
+                       BINOP(float, +);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ADD_R8)
-                       BINOP(f, +);
+                       BINOP(double, +);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ADD1_I4)
-                       ++sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) + 1;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ADD1_I8)
-                       ++sp [-1].data.l;
-                       ++ip;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LOCADD1_I4)
-                       *(gint32*)(locals + ip [1]) += 1;
-                       ip += 2;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LOCADD1_I8)
-                       *(gint64*)(locals + ip [1]) += 1;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + 1;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SUB_I4)
-                       BINOP(i, -);
+                       BINOP(gint32, -);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SUB_I8)
-                       BINOP(l, -);
+                       BINOP(gint64, -);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SUB_R4)
-                       BINOP(f_r4, -);
+                       BINOP(float, -);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SUB_R8)
-                       BINOP(f, -);
+                       BINOP(double, -);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SUB1_I4)
-                       --sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) - 1;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SUB1_I8)
-                       --sp [-1].data.l;
-                       ++ip;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LOCSUB1_I4)
-                       *(gint32*)(locals + ip [1]) -= 1;
-                       ip += 2;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LOCSUB1_I8)
-                       *(gint64*)(locals + ip [1]) -= 1;
+                       LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) - 1;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MUL_I4)
-                       BINOP(i, *);
+                       BINOP(gint32, *);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MUL_I8)
-                       BINOP(l, *);
+                       BINOP(gint64, *);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MUL_R4)
-                       BINOP(f_r4, *);
+                       BINOP(float, *);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MUL_R8)
-                       BINOP(f, *);
+                       BINOP(double, *);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_DIV_I4) {
-                       gint32 l1 = sp [-1].data.i;
-                       gint32 l2 = sp [-2].data.i;
-                       if (l1 == 0)
+                       gint32 i1 = LOCAL_VAR (ip [2], gint32);
+                       gint32 i2 = LOCAL_VAR (ip [3], gint32);
+                       if (i2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       if (l1 == (-1) && l2 == G_MININT32)
+                       if (i2 == (-1) && i1 == G_MININT32)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(i, /);
+                       LOCAL_VAR (ip [1], gint32) = i1 / i2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_DIV_I8) {
-                       gint64 l1 = sp [-1].data.l;
-                       gint64 l2 = sp [-2].data.l;
-                       if (l1 == 0)
+                       gint64 l1 = LOCAL_VAR (ip [2], gint64);
+                       gint64 l2 = LOCAL_VAR (ip [3], gint64);
+                       if (l2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       if (l1 == (-1) && l2 == G_MININT64)
+                       if (l2 == (-1) && l1 == G_MININT64)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(l, /);
+                       LOCAL_VAR (ip [1], gint64) = l1 / l2;
+                       ip += 4;
                        MINT_IN_BREAK;
                        }
                MINT_IN_CASE(MINT_DIV_R4)
-                       BINOP(f_r4, /);
+                       BINOP(float, /);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_DIV_R8)
-                       BINOP(f, /);
+                       BINOP(double, /);
                        MINT_IN_BREAK;
-
-#define BINOP_CAST(datamem, op, type) \
-       --sp; \
-       sp [-1].data.datamem = (type)sp [-1].data.datamem op (type)sp [0].data.datamem; \
-       ++ip;
-               MINT_IN_CASE(MINT_DIV_UN_I4)
-                       if (sp [-1].data.i == 0)
+               MINT_IN_CASE(MINT_DIV_UN_I4) {
+                       guint32 i2 = LOCAL_VAR (ip [3], guint32);
+                       if (i2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       BINOP_CAST(i, /, guint32);
+                       LOCAL_VAR (ip [1], guint32) = LOCAL_VAR (ip [2], guint32) / i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_DIV_UN_I8)
-                       if (sp [-1].data.l == 0)
+               }
+               MINT_IN_CASE(MINT_DIV_UN_I8) {
+                       guint64 l2 = LOCAL_VAR (ip [3], guint64);
+                       if (l2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       BINOP_CAST(l, /, guint64);
+                       LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64) / l2;
+                       ip += 4;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_REM_I4) {
-                       int i1 = sp [-1].data.i;
-                       int i2 = sp [-2].data.i;
-                       if (i1 == 0)
+                       gint32 i1 = LOCAL_VAR (ip [2], gint32);
+                       gint32 i2 = LOCAL_VAR (ip [3], gint32);
+                       if (i2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       if (i1 == (-1) && i2 == G_MININT32)
+                       if (i2 == (-1) && i1 == G_MININT32)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(i, %);
+                       LOCAL_VAR (ip [1], gint32) = i1 % i2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_REM_I8) {
-                       gint64 l1 = sp [-1].data.l;
-                       gint64 l2 = sp [-2].data.l;
-                       if (l1 == 0)
+                       gint64 l1 = LOCAL_VAR (ip [2], gint64);
+                       gint64 l2 = LOCAL_VAR (ip [3], gint64);
+                       if (l2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       if (l1 == (-1) && l2 == G_MININT64)
+                       if (l2 == (-1) && l1 == G_MININT64)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(l, %);
+                       LOCAL_VAR (ip [1], gint64) = l1 % l2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_REM_R4)
-                       /* FIXME: what do we actually do here? */
-                       --sp;
-                       sp [-1].data.f_r4 = fmodf (sp [-1].data.f_r4, sp [0].data.f_r4);
-                       ++ip;
+                       LOCAL_VAR (ip [1], float) = fmodf (LOCAL_VAR (ip [2], float), LOCAL_VAR (ip [3], float));
+                       ip += 4;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_REM_R8)
-                       /* FIXME: what do we actually do here? */
-                       --sp;
-                       sp [-1].data.f = fmod (sp [-1].data.f, sp [0].data.f);
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = fmod (LOCAL_VAR (ip [2], double), LOCAL_VAR (ip [3], double));
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_REM_UN_I4)
-                       if (sp [-1].data.i == 0)
+               MINT_IN_CASE(MINT_REM_UN_I4) {
+                       guint32 i2 = LOCAL_VAR (ip [3], guint32);
+                       if (i2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       BINOP_CAST(i, %, guint32);
+                       LOCAL_VAR (ip [1], guint32) = LOCAL_VAR (ip [2], guint32) % i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_REM_UN_I8)
-                       if (sp [-1].data.l == 0)
+               }
+               MINT_IN_CASE(MINT_REM_UN_I8) {
+                       guint64 l2 = LOCAL_VAR (ip [3], guint64);
+                       if (l2 == 0)
                                THROW_EX (mono_get_exception_divide_by_zero (), ip);
-                       BINOP_CAST(l, %, guint64);
+                       LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64) % l2;
+                       ip += 4;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_AND_I4)
-                       BINOP(i, &);
+                       BINOP(gint32, &);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_AND_I8)
-                       BINOP(l, &);
+                       BINOP(gint64, &);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_OR_I4)
-                       BINOP(i, |);
+                       BINOP(gint32, |);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_OR_I8)
-                       BINOP(l, |);
+                       BINOP(gint64, |);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_XOR_I4)
-                       BINOP(i, ^);
+                       BINOP(gint32, ^);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_XOR_I8)
-                       BINOP(l, ^);
+                       BINOP(gint64, ^);
                        MINT_IN_BREAK;
 
-#define SHIFTOP(datamem, op) \
-       --sp; \
-       sp [-1].data.datamem op ## = sp [0].data.i; \
-       ++ip;
+#define SHIFTOP(datatype, op) \
+       LOCAL_VAR (ip [1], datatype) = LOCAL_VAR (ip [2], datatype) op LOCAL_VAR (ip [3], gint32); \
+       ip += 4;
 
                MINT_IN_CASE(MINT_SHL_I4)
-                       SHIFTOP(i, <<);
+                       SHIFTOP(gint32, <<);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SHL_I8)
-                       SHIFTOP(l, <<);
+                       SHIFTOP(gint64, <<);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SHR_I4)
-                       SHIFTOP(i, >>);
+                       SHIFTOP(gint32, >>);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SHR_I8)
-                       SHIFTOP(l, >>);
+                       SHIFTOP(gint64, >>);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SHR_UN_I4)
-                       --sp;
-                       sp [-1].data.i = (guint32)sp [-1].data.i >> sp [0].data.i;
-                       ++ip;
+                       SHIFTOP(guint32, >>);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SHR_UN_I8)
-                       --sp;
-                       sp [-1].data.l = (guint64)sp [-1].data.l >> sp [0].data.i;
-                       ++ip;
+                       SHIFTOP(guint64, >>);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_NEG_I4)
-                       sp [-1].data.i = - sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = - LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_NEG_I8)
-                       sp [-1].data.l = - sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = - LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_NEG_R4)
-                       sp [-1].data.f_r4 = - sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], float) = - LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_NEG_R8)
-                       sp [-1].data.f = - sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = - LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_NOT_I4)
-                       sp [-1].data.i = ~ sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = ~ LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_NOT_I8)
-                       sp [-1].data.l = ~ sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = ~ LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I1_I4)
-                       sp [-1].data.i = (gint8)sp [-1].data.i;
-                       ++ip;
+                       // FIXME read casted var directly and remove redundant conv opcodes
+                       LOCAL_VAR (ip [1], gint32) = (gint8)LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I1_I8)
-                       sp [-1].data.i = (gint8)sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8)LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I1_R4)
-                       sp [-1].data.i = (gint8) (gint32) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) (gint32) LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I1_R8)
                        /* without gint32 cast, C compiler is allowed to use undefined
@@ -4695,187 +4659,170 @@ call:
                         * > is discarded.  The behavior is undefined if the truncated
                         * > value cannot be represented in the destination type.
                         * */
-                       sp [-1].data.i = (gint8) (gint32) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) (gint32) LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U1_I4)
-                       sp [-1].data.i = (guint8)sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint8) LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U1_I8)
-                       sp [-1].data.i = (guint8)sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint8) LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U1_R4)
-                       sp [-1].data.i = (guint8) (guint32) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint8) (guint32) LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U1_R8)
-                       sp [-1].data.i = (guint8) (guint32) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint8) (guint32) LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I2_I4)
-                       sp [-1].data.i = (gint16)sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I2_I8)
-                       sp [-1].data.i = (gint16)sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I2_R4)
-                       sp [-1].data.i = (gint16) (gint32) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) (gint32) LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I2_R8)
-                       sp [-1].data.i = (gint16) (gint32) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) (gint32) LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U2_I4)
-                       sp [-1].data.i = (guint16)sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint16) LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U2_I8)
-                       sp [-1].data.i = (guint16)sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint16) LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U2_R4)
-                       sp [-1].data.i = (guint16) (guint32) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint16) (guint32) LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U2_R8)
-                       sp [-1].data.i = (guint16) (guint32) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint16) (guint32) LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I4_R4)
-                       sp [-1].data.i = (gint32) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32) LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I4_R8)
-                       sp [-1].data.i = (gint32)sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32) LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U4_I8)
                MINT_IN_CASE(MINT_CONV_I4_I8)
-                       sp [-1].data.i = (gint32)sp [-1].data.l;
-                       ++ip;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_I4_I8_SP)
-                       sp [-2].data.i = (gint32)sp [-2].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32) LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U4_R4)
 #ifdef MONO_ARCH_EMULATE_FCONV_TO_U4
-                       sp [-1].data.i = mono_rconv_u4 (sp [-1].data.f_r4);
+                       LOCAL_VAR (ip [1], gint32) = mono_rconv_u4 (LOCAL_VAR (ip [2], float));
 #else
-                       sp [-1].data.i = (guint32) sp [-1].data.f_r4;
+                       LOCAL_VAR (ip [1], gint32) = (guint32) LOCAL_VAR (ip [2], float);
 #endif
-                       ++ip;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U4_R8)
 #ifdef MONO_ARCH_EMULATE_FCONV_TO_U4
-                       sp [-1].data.i = mono_fconv_u4_2 (sp [-1].data.f);
+                       LOCAL_VAR (ip [1], gint32) = mono_fconv_u4_2 (LOCAL_VAR (ip [2], double));
 #else
-                       sp [-1].data.i = (guint32) sp [-1].data.f;
+                       LOCAL_VAR (ip [1], gint32) = (guint32) LOCAL_VAR (ip [2], double);
 #endif
-                       ++ip;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I8_I4)
-                       sp [-1].data.l = sp [-1].data.i;
-                       ++ip;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_I8_I4_SP)
-                       sp [-2].data.l = sp [-2].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I8_U4)
-                       sp [-1].data.l = (guint32)sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = (guint32) LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I8_R4)
-                       sp [-1].data.l = (gint64) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = (gint64) LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_I8_R8)
-                       sp [-1].data.l = (gint64)sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = (gint64) LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_R4_I4)
-                       sp [-1].data.f_r4 = (float)sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], float) = (float) LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_R4_I8)
-                       sp [-1].data.f_r4 = (float)sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], float) = (float) LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_R4_R8)
-                       sp [-1].data.f_r4 = (float)sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], float) = (float) LOCAL_VAR (ip [2], double);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_R8_I4)
-                       sp [-1].data.f = (double)sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = (double) LOCAL_VAR (ip [2], gint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_R8_I8)
-                       sp [-1].data.f = (double)sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = (double) LOCAL_VAR (ip [2], gint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_R8_R4)
-                       sp [-1].data.f = (double) sp [-1].data.f_r4;
-                       ++ip;
-                       MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_R8_R4_SP)
-                       sp [-2].data.f = (double) sp [-2].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = (double) LOCAL_VAR (ip [2], float);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U8_R4)
 #ifdef MONO_ARCH_EMULATE_FCONV_TO_U8
-                       sp [-1].data.l = mono_rconv_u8 (sp [-1].data.f_r4);
+                       LOCAL_VAR (ip [1], gint64) = mono_rconv_u8 (LOCAL_VAR (ip [2], float));
 #else
-                       sp [-1].data.l = (guint64) sp [-1].data.f_r4;
+                       LOCAL_VAR (ip [1], gint64) = (guint64) LOCAL_VAR (ip [2], float);
 #endif
-                       ++ip;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_U8_R8)
 #ifdef MONO_ARCH_EMULATE_FCONV_TO_U8
-                       sp [-1].data.l = mono_fconv_u8_2 (sp [-1].data.f);
+                       LOCAL_VAR (ip [1], gint64) = mono_fconv_u8_2 (LOCAL_VAR (ip [2], double));
 #else
-                       sp [-1].data.l = (guint64)sp [-1].data.f;
+                       LOCAL_VAR (ip [1], gint64) = (guint64) LOCAL_VAR (ip [2], double);
 #endif
-                       ++ip;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CPOBJ) {
-                       MonoClass* const c = (MonoClass*)frame->imethod->data_items[ip [1]];
+                       MonoClass* const c = (MonoClass*)frame->imethod->data_items[ip [3]];
                        g_assert (m_class_is_valuetype (c));
                        /* if this assertion fails, we need to add a write barrier */
                        g_assert (!MONO_TYPE_IS_REFERENCE (m_class_get_byval_arg (c)));
-                       stackval_from_data (m_class_get_byval_arg (c), (stackval*)sp [-2].data.p, sp [-1].data.p, FALSE);
-                       ip += 2;
-                       sp -= 2;
+                       stackval_from_data (m_class_get_byval_arg (c), (stackval*)LOCAL_VAR (ip [1], gpointer), LOCAL_VAR (ip [2], gpointer), FALSE);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CPOBJ_VT) {
-                       MonoClass* const c = (MonoClass*)frame->imethod->data_items[ip [1]];
-                       mono_value_copy_internal (sp [-2].data.vt, sp [-1].data.vt, c);
-                       ip += 2;
-                       sp -= 2;
+                       MonoClass* const c = (MonoClass*)frame->imethod->data_items[ip [3]];
+                       mono_value_copy_internal (LOCAL_VAR (ip [1], gpointer), LOCAL_VAR (ip [2], gpointer), c);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDOBJ_VT) {
-                       int size = READ32(ip + 1);
-                       sp--;
-                       memcpy (sp, sp [0].data.p, size);
-                       sp = STACK_ADD_BYTES (sp, size);
-                       ip += 3;
+                       guint16 size = ip [3];
+                       memcpy (locals + ip [1], LOCAL_VAR (ip [2], gpointer), size);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDSTR)
-                       sp->data.p = frame->imethod->data_items [ip [1]];
-                       ++sp;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = frame->imethod->data_items [ip [2]];
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDSTR_TOKEN) {
                        MonoString *s = NULL;
-                       guint32 strtoken = (guint32)(gsize)frame->imethod->data_items [ip [1]];
+                       guint32 strtoken = (guint32)(gsize)frame->imethod->data_items [ip [2]];
 
                        MonoMethod *method = frame->imethod->method;
                        if (method->wrapper_type == MONO_WRAPPER_DYNAMIC_METHOD) {
@@ -4885,53 +4832,48 @@ call:
                        } else {
                                g_assert_not_reached ();
                        }
-                       sp->data.p = s;
-                       ++sp;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = s;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_NEWOBJ_ARRAY) {
                        MonoClass *newobj_class;
-                       guint32 token = ip [1];
-                       guint16 param_count = ip [2];
+                       guint32 token = ip [2];
+                       guint16 param_count = ip [3];
 
                        newobj_class = (MonoClass*) frame->imethod->data_items [token];
 
-                       sp -= param_count;
-                       sp->data.o = ves_array_create (frame->imethod->domain, newobj_class, param_count, sp, error);
+                       LOCAL_VAR (ip [1], MonoObject*) = ves_array_create (frame->imethod->domain, newobj_class, param_count, (stackval*)(locals + ip [1]), error);
                        if (!is_ok (error))
                                THROW_EX (mono_error_convert_to_exception (error), ip);
-
-                       ++sp;
-                       ip += 3;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_NEWOBJ_STRING) {
-                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [1]];
+                       cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
+                       call_args_offset = ip [1];
+
+                       int param_size = ip [3];
+                       if (param_size)
+                               memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
 
-                       int param_size = ip [2];
-                       if (param_size) {
-                               sp = STACK_SUB_BYTES (sp, param_size);
-                               memmove (sp + 1, sp, param_size);
-                       }
                        // `this` is implicit null. The created string will be returned
                        // by the call, even though the call has void return (?!).
-                       sp->data.p = NULL;
-                       ip += 3;
+                       LOCAL_VAR (call_args_offset, gpointer) = NULL;
+                       ip += 4;
                        goto call;
                }
                MINT_IN_CASE(MINT_NEWOBJ_FAST) {
                        MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [3]];
                        INIT_VTABLE (vtable);
-                       guint16 imethod_index = ip [1];
-                       guint16 param_size = ip [2];
+                       guint16 imethod_index = ip [2];
+                       guint16 param_size = ip [4];
+                       call_args_offset = ip [1];
                        const gboolean is_inlined = imethod_index == INLINED_METHOD_FLAG;
 
                        // Make room for two copies of o -- this parameter and return value.
-                       if (param_size) {
-                               sp = STACK_SUB_BYTES (sp, param_size);
-                               memmove (sp + 2, sp, param_size);
-                       }
+                       if (param_size)
+                               memmove (locals + call_args_offset + 2 * MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
 
                        MonoObject *o = mono_gc_alloc_obj (vtable, m_class_get_instance_size (vtable->klass));
                        if (G_UNLIKELY (!o)) {
@@ -4939,55 +4881,52 @@ call:
                                THROW_EX (mono_error_convert_to_exception (error), ip);
                        }
 
-                       sp [0].data.o = o;
-                       sp++;
-                       sp [0].data.o = o;
-                       ip += 5;
+                       // This is return value
+                       LOCAL_VAR (call_args_offset, MonoObject*) = o;
+                       // Set `this` arg for ctor call
+                       call_args_offset += MINT_STACK_SLOT_SIZE;
+                       LOCAL_VAR (call_args_offset, MonoObject*) = o;
+                       ip += 6;
                        if (!is_inlined) {
                                cmethod = (InterpMethod*)frame->imethod->data_items [imethod_index];
                                goto call;
                        }
-                       sp = STACK_ADD_BYTES (sp, param_size + MINT_STACK_SLOT_SIZE);
-
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_NEWOBJ_VT_FAST) {
-                       guint16 imethod_index = ip [1];
-                       guint16 param_size = ip [2];
+                       guint16 imethod_index = ip [2];
                        guint16 ret_size = ip [3];
+                       guint16 param_size = ip [4];
                        gboolean is_inlined = imethod_index == INLINED_METHOD_FLAG;
+                       call_args_offset = ip [1];
+                       gpointer this_vt = locals + call_args_offset;
 
-                       // Make room for extra parameter and result.
-                       if (param_size) {
-                               sp = STACK_SUB_BYTES (sp, param_size);
-                               memmove (STACK_ADD_BYTES (sp, ret_size + MINT_STACK_SLOT_SIZE), sp, param_size);
-                       }
-                       // Allocate return value on stack
-                       stackval *retvt = sp;
-                       memset (retvt, 0, ret_size);
-                       sp = STACK_ADD_BYTES (sp, ret_size);
-                       sp [0].data.p = retvt;
+                       if (param_size)
+                               memmove (locals + call_args_offset + ret_size + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
 
-                       ip += 5;
+                       // clear the valuetype
+                       memset (this_vt, 0, ret_size);
+                       call_args_offset += ret_size;
+                       // pass the address of the valuetype
+                       LOCAL_VAR (call_args_offset, gpointer) = this_vt;
+
+                       ip += 6;
                        if (!is_inlined) {
                                cmethod = (InterpMethod*)frame->imethod->data_items [imethod_index];
                                goto call;
                        }
-                       sp = STACK_ADD_BYTES (sp, param_size + MINT_STACK_SLOT_SIZE);
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_NEWOBJ) {
-                       guint32 const token = ip [1];
-                       guint16 param_size = ip [2];
+                       guint32 const token = ip [2];
+                       guint16 param_size = ip [3];
+                       call_args_offset = ip [1];
 
                        cmethod = (InterpMethod*)frame->imethod->data_items [token];
 
-                       // Make room for result and `this`
-                       if (param_size) {
-                               sp = STACK_SUB_BYTES (sp, param_size);
-                               memmove (sp + 2, sp, param_size);
-                       }
+                       if (param_size)
+                               memmove (locals + call_args_offset + 2 * MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
 
                        MonoClass * const newobj_class = cmethod->method->klass;
 
@@ -5007,8 +4946,9 @@ call:
                        }
                        error_init_reuse (error);
                        MonoObject* o = mono_object_new_checked (domain, newobj_class, error);
-                       sp [0].data.o = o; // return value
-                       sp [1].data.o = o; // first parameter
+                       LOCAL_VAR (call_args_offset, MonoObject*) = o; // return value
+                       call_args_offset += MINT_STACK_SLOT_SIZE;
+                       LOCAL_VAR (call_args_offset, MonoObject*) = o; // first parameter
 
                        mono_interp_error_cleanup (error); // FIXME: do not swallow the error
                        EXCEPTION_CHECKPOINT;
@@ -5020,112 +4960,92 @@ call:
                                mono_error_assert_ok (error);
                        }
 #endif
-                       ip += 3;
-                       sp++;
+                       ip += 4;
                        goto call;
                }
-               MINT_IN_CASE(MINT_NEWOBJ_MAGIC) {
-                       ip += 2;
-
-                       MINT_IN_BREAK;
-               }
                MINT_IN_CASE(MINT_INTRINS_SPAN_CTOR) {
-                       sp -= 2;
-                       gpointer ptr = sp [0].data.p;
-                       int len = sp [1].data.i;
+                       gpointer ptr = LOCAL_VAR (ip [2], gpointer);
+                       int len = LOCAL_VAR (ip [3], gint32);
                        if (len < 0)
                                THROW_EX (mono_get_exception_argument_out_of_range ("length"), ip);
-                       *(gpointer*)sp = ptr;
-                       *(gint32*)((gpointer*)sp + 1) = len;
-#if SIZEOF_VOID_P == 8
-                       sp = STACK_ADD_BYTES (sp, 12);
-#else
-                       sp = STACK_ADD_BYTES (sp, 8);
-#endif
-                       ip++;
+                       gpointer span = locals + ip [1];
+                       *(gpointer*)span = ptr;
+                       *(gint32*)((gpointer*)span + 1) = len;
+                       ip += 4;;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_BYREFERENCE_GET_VALUE) {
-                       sp [-1].data.p = *(gpointer*)sp [-1].data.p;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gpointer) = *LOCAL_VAR (ip [2], gpointer*);
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_UNSAFE_ADD_BYTE_OFFSET) {
-                       sp -= 2;
-                       sp [0].data.p = (guint8*)sp [0].data.p + sp [1].data.nati;
-                       sp ++;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gpointer) = LOCAL_VAR (ip [2], guint8*) + LOCAL_VAR (ip [3], mono_u);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_CLEAR_WITH_REFERENCES) {
-                       sp -= 2;
-                       gpointer p = sp [0].data.p;
-                       size_t size = sp [1].data.nati * sizeof (gpointer);
+                       gpointer p = LOCAL_VAR (ip [1], gpointer);
+                       size_t size = LOCAL_VAR (ip [2], mono_u) * sizeof (gpointer);
                        mono_gc_bzero_aligned (p, size);
-                       ++ip;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_MARVIN_BLOCK) {
-                       sp -= 2;
-                       interp_intrins_marvin_block ((guint32*)sp [0].data.p, (guint32*)sp [1].data.p);
-                       ++ip;
+                       interp_intrins_marvin_block (LOCAL_VAR (ip [1], guint32*), LOCAL_VAR (ip [2], guint32*));
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_ASCII_CHARS_TO_UPPERCASE) {
-                       sp [-1].data.i = interp_intrins_ascii_chars_to_uppercase ((guint32)sp [-1].data.i);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = interp_intrins_ascii_chars_to_uppercase (LOCAL_VAR (ip [2], guint32));
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp[-1].data.p = (guint8*)o + MONO_STRUCT_OFFSET (MonoArray, vector);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gpointer) = (guint8*)o + MONO_STRUCT_OFFSET (MonoArray, vector);
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_ORDINAL_IGNORE_CASE_ASCII) {
-                       sp--;
-                       sp [-1].data.i = interp_intrins_ordinal_ignore_case_ascii ((guint32)sp [-1].data.i, (guint32)sp [0].data.i);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = interp_intrins_ordinal_ignore_case_ascii (LOCAL_VAR (ip [2], guint32), LOCAL_VAR (ip [3], guint32));
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_64ORDINAL_IGNORE_CASE_ASCII) {
-                       sp--;
-                       sp [-1].data.i = interp_intrins_64ordinal_ignore_case_ascii ((guint64)sp [-1].data.l, (guint64)sp [0].data.l);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = interp_intrins_64ordinal_ignore_case_ascii (LOCAL_VAR (ip [2], guint64), LOCAL_VAR (ip [3], guint64));
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_U32_TO_DECSTR) {
-                       MonoArray **cache_addr = (MonoArray**)frame->imethod->data_items [ip [1]];
-                       MonoVTable *string_vtable = (MonoVTable*)frame->imethod->data_items [ip [2]];
-                       sp [-1].data.o = (MonoObject*)interp_intrins_u32_to_decstr ((guint32)sp [-1].data.i, *cache_addr, string_vtable);
-                       ip += 3;
+                       MonoArray **cache_addr = (MonoArray**)frame->imethod->data_items [ip [3]];
+                       MonoVTable *string_vtable = (MonoVTable*)frame->imethod->data_items [ip [4]];
+                       LOCAL_VAR (ip [1], MonoObject*) = (MonoObject*)interp_intrins_u32_to_decstr (LOCAL_VAR (ip [2], guint32), *cache_addr, string_vtable);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_WIDEN_ASCII_TO_UTF16) {
-                       sp -= 2;
-                       sp [-1].data.nati = interp_intrins_widen_ascii_to_utf16 ((guint8*)sp [-1].data.p, (mono_unichar2*)sp [0].data.p, sp [1].data.nati);
-                       ip++;
+                       LOCAL_VAR (ip [1], mono_u) = interp_intrins_widen_ascii_to_utf16 (LOCAL_VAR (ip [2], guint8*), LOCAL_VAR (ip [3], mono_unichar2*), LOCAL_VAR (ip [4], mono_u));
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_UNSAFE_BYTE_OFFSET) {
-                       sp -= 2;
-                       sp [0].data.nati = (guint8*)sp [1].data.p - (guint8*)sp [0].data.p;
-                       sp ++;
-                       ++ip;
+                       LOCAL_VAR (ip [1], mono_u) = LOCAL_VAR (ip [3], guint8*) - LOCAL_VAR (ip [2], guint8*);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_RUNTIMEHELPERS_OBJECT_HAS_COMPONENT_SIZE) {
-                       MonoObject *obj = sp [-1].data.o;
-                       sp [-1].data.i = (obj->vtable->flags & MONO_VT_FLAG_ARRAY_OR_STRING) != 0;
-                       ++ip;
+                       MonoObject *obj = LOCAL_VAR (ip [2], MonoObject*);
+                       LOCAL_VAR (ip [1], gint32) = (obj->vtable->flags & MONO_VT_FLAG_ARRAY_OR_STRING) != 0;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CASTCLASS_INTERFACE)
                MINT_IN_CASE(MINT_ISINST_INTERFACE) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        if (o) {
-                               MonoClass* const c = (MonoClass*)frame->imethod->data_items [ip [1]];
+                               MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
                                gboolean isinst;
                                if (MONO_VTABLE_IMPLEMENTS_INTERFACE (o->vtable, m_class_get_interface_id (c))) {
                                        isinst = TRUE;
@@ -5139,75 +5059,88 @@ call:
                                if (!isinst) {
                                        gboolean const isinst_instr = *ip == MINT_ISINST_INTERFACE;
                                        if (isinst_instr)
-                                               sp [-1].data.p = NULL;
+                                               LOCAL_VAR (ip [1], MonoObject*) = NULL;
                                        else
                                                THROW_EX (mono_get_exception_invalid_cast (), ip);
+                               } else {
+                                       LOCAL_VAR (ip [1], MonoObject*) = o;
                                }
+                       } else {
+                               LOCAL_VAR (ip [1], MonoObject*) = NULL;
                        }
-                       ip += 2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CASTCLASS_COMMON)
                MINT_IN_CASE(MINT_ISINST_COMMON) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        if (o) {
-                               MonoClass* const c = (MonoClass*)frame->imethod->data_items [ip [1]];
+                               MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
                                gboolean isinst = mono_class_has_parent_fast (o->vtable->klass, c);
 
                                if (!isinst) {
                                        gboolean const isinst_instr = *ip == MINT_ISINST_COMMON;
                                        if (isinst_instr)
-                                               sp [-1].data.p = NULL;
+                                               LOCAL_VAR (ip [1], MonoObject*) = NULL;
                                        else
                                                THROW_EX (mono_get_exception_invalid_cast (), ip);
+                               } else {
+                                       LOCAL_VAR (ip [1], MonoObject*) = o;
                                }
+                       } else {
+                               LOCAL_VAR (ip [1], MonoObject*) = NULL;
                        }
-                       ip += 2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CASTCLASS)
                MINT_IN_CASE(MINT_ISINST) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        if (o) {
-                               MonoClass* const c = (MonoClass*)frame->imethod->data_items [ip [1]];
+                               MonoClass* const c = (MonoClass*)frame->imethod->data_items [ip [3]];
                                if (!mono_interp_isinst (o, c)) { // FIXME: do not swallow the error
                                        gboolean const isinst_instr = *ip == MINT_ISINST;
                                        if (isinst_instr)
-                                               sp [-1].data.p = NULL;
+                                               LOCAL_VAR (ip [1], MonoObject*) = NULL;
                                        else
                                                THROW_EX (mono_get_exception_invalid_cast (), ip);
+                               } else {
+                                       LOCAL_VAR (ip [1], MonoObject*) = o;
                                }
+                       } else {
+                               LOCAL_VAR (ip [1], MonoObject*) = NULL;
                        }
-                       ip += 2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CONV_R_UN_I4)
-                       sp [-1].data.f = (double)(guint32)sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = (double)LOCAL_VAR (ip [2], guint32);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CONV_R_UN_I8)
-                       sp [-1].data.f = (double)(guint64)sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = (double)LOCAL_VAR (ip [2], guint64);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_UNBOX) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       MonoClass* const c = (MonoClass*)frame->imethod->data_items[ip [1]];
+                       MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
 
                        if (!(m_class_get_rank (o->vtable->klass) == 0 && m_class_get_element_class (o->vtable->klass) == m_class_get_element_class (c)))
                                THROW_EX (mono_get_exception_invalid_cast (), ip);
 
-                       sp [-1].data.p = mono_object_unbox_internal (o);
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = mono_object_unbox_internal (o);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_THROW)
-                       --sp;
-                       if (!sp->data.p)
-                               sp->data.p = mono_get_exception_null_reference ();
+               MINT_IN_CASE(MINT_THROW) {
+                       MonoException *ex = LOCAL_VAR (ip [1], MonoException*);
+                       if (!ex)
+                               ex = mono_get_exception_null_reference ();
 
-                       THROW_EX ((MonoException *)sp->data.p, ip);
+                       THROW_EX (ex, ip);
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_CHECKPOINT)
                        /* Do synchronous checking of abort requests */
                        EXCEPTION_CHECKPOINT;
@@ -5221,554 +5154,462 @@ call:
                        ++ip;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDFLDA_UNSAFE) {
-                       sp[-1].data.p = (char*)sp [-1].data.o + ip [1];
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = (char*)LOCAL_VAR (ip [2], gpointer) + ip [3];
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDFLDA) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp[-1].data.p = (char *)o + ip [1];
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = (char *)o + ip [3];
+                       ip += 4;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_CKNULL_N) {
-                       /* Same as CKNULL, but further down the stack */
-                       int offset = ip [1];
-                       // This doesn't follow the current stack based design, but we plan to switch to explicit offsets.
-                       MonoObject *o = *(MonoObject**)(locals + frame->imethod->total_locals_size + offset);
+               MINT_IN_CASE(MINT_CKNULL) {
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       ip += 2;
+                       LOCAL_VAR (ip [1], MonoObject*) = o;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
 
-#define LDFLD_VT_UNALIGNED(datamem, fieldtype, unaligned) do { \
-       sp = STACK_SUB_BYTES (sp, ip [2]); \
+// FIXME squash to load directly field type, LDFLD_VT is just a LDLOC
+#define LDFLD_VT_UNALIGNED(datatype, fieldtype, unaligned) do { \
        if (unaligned) \
-               memcpy (&sp [0].data.datamem, (char *)sp + ip [1], sizeof (fieldtype)); \
+               memcpy (locals + ip [1], (char *)locals + ip [2] + ip [3], sizeof (fieldtype)); \
        else \
-               sp [0].data.datamem = * (fieldtype *)((char *)sp + ip [1]); \
-       sp++; \
-       ip += 3; \
+               LOCAL_VAR (ip [1], datatype) = LOCAL_VAR (ip [2] + ip [3], fieldtype); \
+       ip += 4; \
 } while (0)
 
-#define LDFLD_VT(datamem, fieldtype) LDFLD_VT_UNALIGNED(datamem, fieldtype, FALSE)
+#define LDFLD_VT(datatype, fieldtype) LDFLD_VT_UNALIGNED(datatype, fieldtype, FALSE)
 
-               MINT_IN_CASE(MINT_LDFLD_VT_I1) LDFLD_VT(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_U1) LDFLD_VT(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_I2) LDFLD_VT(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_U2) LDFLD_VT(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_I4) LDFLD_VT(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_I8) LDFLD_VT(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_R4) LDFLD_VT(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_R8) LDFLD_VT(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_O) LDFLD_VT(p, gpointer); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_I8_UNALIGNED) LDFLD_VT_UNALIGNED(l, gint64, TRUE); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_VT_R8_UNALIGNED) LDFLD_VT_UNALIGNED(f, double, TRUE); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_I1) LDFLD_VT(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_U1) LDFLD_VT(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_I2) LDFLD_VT(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_U2) LDFLD_VT(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_I4) LDFLD_VT(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_I8) LDFLD_VT(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_R4) LDFLD_VT(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_R8) LDFLD_VT(double, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_O) LDFLD_VT(gpointer, gpointer); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_I8_UNALIGNED) LDFLD_VT_UNALIGNED(gint64, gint64, TRUE); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_VT_R8_UNALIGNED) LDFLD_VT_UNALIGNED(double, double, TRUE); MINT_IN_BREAK;
 
                MINT_IN_CASE(MINT_LDFLD_VT_VT) {
-                       sp = STACK_SUB_BYTES (sp, ip [2]); \
-                       memmove (sp, (char *)sp + ip [1], ip [3]);
-                       sp = STACK_ADD_BYTES (sp, ip [3]);
-                       ip += 4;
+                       memmove (locals + ip [1], locals + ip [2] + ip [3], ip [4]);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
 
-#define LDFLD_UNALIGNED(datamem, fieldtype, unaligned) do { \
-       MonoObject* const o = sp [-1].data.o; \
+#define LDFLD_UNALIGNED(datatype, fieldtype, unaligned) do { \
+       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*); \
        NULL_CHECK (o); \
        if (unaligned) \
-               memcpy (&sp[-1].data.datamem, (char *)o + ip [1], sizeof (fieldtype)); \
+               memcpy (locals + ip [1], (char *)o + ip [3], sizeof (fieldtype)); \
        else \
-               sp[-1].data.datamem = * (fieldtype *)((char *)o + ip [1]) ; \
-       ip += 2; \
+               LOCAL_VAR (ip [1], datatype) = * (fieldtype *)((char *)o + ip [3]) ; \
+       ip += 4; \
 } while (0)
 
 #define LDFLD(datamem, fieldtype) LDFLD_UNALIGNED(datamem, fieldtype, FALSE)
 
-               MINT_IN_CASE(MINT_LDFLD_I1) LDFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_U1) LDFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_I2) LDFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_U2) LDFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_I4) LDFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_I8) LDFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_R4) LDFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_R8) LDFLD(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_O) LDFLD(p, gpointer); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_I8_UNALIGNED) LDFLD_UNALIGNED(l, gint64, TRUE); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDFLD_R8_UNALIGNED) LDFLD_UNALIGNED(f, double, TRUE); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_I1) LDFLD(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_U1) LDFLD(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_I2) LDFLD(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_U2) LDFLD(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_I4) LDFLD(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_I8) LDFLD(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_R4) LDFLD(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_R8) LDFLD(double, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_O) LDFLD(gpointer, gpointer); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_I8_UNALIGNED) LDFLD_UNALIGNED(gint64, gint64, TRUE); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDFLD_R8_UNALIGNED) LDFLD_UNALIGNED(double, double, TRUE); MINT_IN_BREAK;
 
                MINT_IN_CASE(MINT_LDFLD_VT) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-
-                       int size = READ32(ip + 2);
-                       sp--;
-                       memcpy (sp, (char *)o + ip [1], size);
-                       sp = STACK_ADD_BYTES (sp, size);
-                       ip += 4;
+                       memcpy (locals + ip [1], (char *)o + ip [3], ip [4]);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_LDRMFLD) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       mono_interp_load_remote_field (frame->imethod, o, ip, sp);
-                       ip += 2;
+                       mono_interp_load_remote_field (frame->imethod, o, ip, locals + ip [1]);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDRMFLD_VT) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp = mono_interp_load_remote_field_vt (frame->imethod, o, ip, sp);
-                       ip += 2;
+                       mono_interp_load_remote_field_vt (frame->imethod, o, ip, locals + ip [1]);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
 
-#define LDLOCFLD(datamem, fieldtype) do { \
-       MonoObject *o = *(MonoObject**)(locals + ip [1]); \
-       NULL_CHECK (o); \
-       sp [0].data.datamem = * (fieldtype *)((char *)o + ip [2]) ; \
-       sp++; \
-       ip += 3; \
-} while (0)
-               MINT_IN_CASE(MINT_LDLOCFLD_I1) LDLOCFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_U1) LDLOCFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_I2) LDLOCFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_U2) LDLOCFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_I4) LDLOCFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_I8) LDLOCFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_R4) LDLOCFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_R8) LDLOCFLD(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOCFLD_O) LDLOCFLD(p, gpointer); MINT_IN_BREAK;
-
-#define STFLD_UNALIGNED(datamem, fieldtype, unaligned) do { \
-       MonoObject* const o = sp [-2].data.o; \
+#define STFLD_UNALIGNED(datatype, fieldtype, unaligned) do { \
+       MonoObject *o = LOCAL_VAR (ip [1], MonoObject*); \
        NULL_CHECK (o); \
-       sp -= 2; \
        if (unaligned) \
-               memcpy ((char *)o + ip [1], &sp[1].data.datamem, sizeof (fieldtype)); \
+               memcpy ((char *)o + ip [3], locals + ip [2], sizeof (fieldtype)); \
        else \
-               * (fieldtype *)((char *)o + ip [1]) = sp[1].data.datamem; \
-       ip += 2; \
+               * (fieldtype *)((char *)o + ip [3]) = LOCAL_VAR (ip [2], datatype); \
+       ip += 4; \
 } while (0)
 
 #define STFLD(datamem, fieldtype) STFLD_UNALIGNED(datamem, fieldtype, FALSE)
 
-               MINT_IN_CASE(MINT_STFLD_I1) STFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_U1) STFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_I2) STFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_U2) STFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_I4) STFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_I8) STFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_R4) STFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_R8) STFLD(f, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_I1) STFLD(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_U1) STFLD(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_I2) STFLD(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_U2) STFLD(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_I4) STFLD(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_I8) STFLD(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_R4) STFLD(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_R8) STFLD(double, double); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_STFLD_O) {
-                       MonoObject* const o = sp [-2].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [1], MonoObject*);
                        NULL_CHECK (o);
-                       sp -= 2;
-                       mono_gc_wbarrier_set_field_internal (o, (char *) o + ip [1], sp [1].data.o);
-                       ip += 2;
+                       mono_gc_wbarrier_set_field_internal (o, (char*)o + ip [3], LOCAL_VAR (ip [2], MonoObject*));
+                       ip += 4;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_STFLD_I8_UNALIGNED) STFLD_UNALIGNED(l, gint64, TRUE); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STFLD_R8_UNALIGNED) STFLD_UNALIGNED(f, double, TRUE); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_I8_UNALIGNED) STFLD_UNALIGNED(gint64, gint64, TRUE); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STFLD_R8_UNALIGNED) STFLD_UNALIGNED(double, double, TRUE); MINT_IN_BREAK;
 
                MINT_IN_CASE(MINT_STFLD_VT_NOREF) {
-                       guint16 offset = ip [1];
-                       guint16 vtsize = ip [2];
-
-                       sp = STACK_SUB_BYTES (sp, MINT_STACK_SLOT_SIZE + vtsize);
-                       MonoObject *o = sp [0].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [1], MonoObject*);
                        NULL_CHECK (o);
-
-                       memcpy ((char *) o + offset, sp + 1, vtsize);
-
-                       ip += 3;
+                       memcpy ((char*)o + ip [3], locals + ip [2], ip [4]);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_STFLD_VT) {
-                       MonoClass *klass = (MonoClass*)frame->imethod->data_items[ip [2]];
-                       int vtsize = mono_class_value_size (klass, NULL);
-
-                       sp = STACK_SUB_BYTES (sp, MINT_STACK_SLOT_SIZE + vtsize);
-                       MonoObject *o = sp [0].data.o;
+                       MonoClass *klass = (MonoClass*)frame->imethod->data_items [ip [4]];
+                       MonoObject *o = LOCAL_VAR (ip [1], MonoObject*);
                        NULL_CHECK (o);
-
-                       guint16 offset = ip [1];
-                       mono_value_copy_internal ((char *) o + offset, sp + 1, klass);
-
-                       ip += 3;
+                       mono_value_copy_internal ((char*)o + ip [3], locals + ip [2], klass);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_STRMFLD) {
                        MonoClassField *field;
 
-                       MonoObject* const o = sp [-2].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [1], MonoObject*);
                        NULL_CHECK (o);
                        
-                       field = (MonoClassField*)frame->imethod->data_items[ip [1]];
-                       ip += 2;
-
+                       field = (MonoClassField*)frame->imethod->data_items [ip [3]];
 #ifndef DISABLE_REMOTING
                        if (mono_object_is_transparent_proxy (o)) {
                                MonoClass *klass = ((MonoTransparentProxy*)o)->remote_class->proxy_class;
-                               mono_store_remote_field_checked (o, klass, field, &sp [-1].data, error);
+                               mono_store_remote_field_checked (o, klass, field, locals + ip [2], error);
                                mono_interp_error_cleanup (error); /* FIXME: don't swallow the error */
                        } else
 #endif
-                               stackval_to_data (field->type, &sp [-1], (char*)o + field->offset, FALSE);
+                               stackval_to_data (field->type, (stackval*)(locals + ip [2]), (char*)o + field->offset, FALSE);
 
-                       sp -= 2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_STRMFLD_VT) {
-                       MonoClassField *field = (MonoClassField*)frame->imethod->data_items [ip [1]];
+                       MonoClassField *field = (MonoClassField*)frame->imethod->data_items [ip [3]];
                        MonoClass *klass = mono_class_from_mono_type_internal (field->type);
-                       int vtsize = mono_class_value_size (klass, NULL);
 
-                       sp = STACK_SUB_BYTES (sp, vtsize + MINT_STACK_SLOT_SIZE);
-                       MonoObject *o = sp [0].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [1], MonoObject*);
                        NULL_CHECK (o);
 
 #ifndef DISABLE_REMOTING
                        if (mono_object_is_transparent_proxy (o)) {
                                MonoClass *klass = ((MonoTransparentProxy*)o)->remote_class->proxy_class;
-                               mono_store_remote_field_checked (o, klass, field, sp + 1, error);
+                               mono_store_remote_field_checked (o, klass, field, locals + ip [2], error);
                                mono_interp_error_cleanup (error); /* FIXME: don't swallow the error */
                        } else
 #endif
-                               mono_value_copy_internal ((char *) o + field->offset, sp + 1, klass);
+                               mono_value_copy_internal ((char *) o + field->offset, locals + ip [2], klass);
 
-                       ip += 2;
-                       MINT_IN_BREAK;
-               }
-
-#define STLOCFLD(datamem, fieldtype) do { \
-       MonoObject *o = *(MonoObject**)(locals + ip [1]); \
-       NULL_CHECK (o); \
-       sp--; \
-       * (fieldtype *)((char *)o + ip [2]) = sp [0].data.datamem; \
-       ip += 3; \
-} while (0)
-               MINT_IN_CASE(MINT_STLOCFLD_I1) STLOCFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_U1) STLOCFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_I2) STLOCFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_U2) STLOCFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_I4) STLOCFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_I8) STLOCFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_R4) STLOCFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_R8) STLOCFLD(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOCFLD_O) {
-                       MonoObject *o = *(MonoObject**)(locals + ip [1]);
-                       NULL_CHECK (o);
-                       sp--;
-                       mono_gc_wbarrier_set_field_internal (o, (char *) o + ip [2], sp [0].data.o);
-                       ip += 3;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_LDSFLDA) {
-                       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [1]];
+                       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [2]];
                        INIT_VTABLE (vtable);
-                       sp->data.p = frame->imethod->data_items [ip [2]];
-                       ip += 3;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gpointer) = frame->imethod->data_items [ip [3]];
+                       ip += 4;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_LDSSFLDA) {
-                       guint32 offset = READ32(ip + 1);
-                       sp->data.p = mono_get_special_static_data (offset);
-                       ip += 3;
-                       ++sp;
+                       guint32 offset = READ32(ip + 2);
+                       LOCAL_VAR (ip [1], gpointer) = mono_get_special_static_data (offset);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
 
 /* We init class here to preserve cctor order */
-#define LDSFLD(datamem, fieldtype) { \
-       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [1]]; \
+#define LDSFLD(datatype, fieldtype) { \
+       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [2]]; \
        INIT_VTABLE (vtable); \
-       sp[0].data.datamem = * (fieldtype *)(frame->imethod->data_items [ip [2]]) ; \
-       ip += 3; \
-       sp++; \
+       LOCAL_VAR (ip [1], datatype) = * (fieldtype *)(frame->imethod->data_items [ip [3]]) ; \
+       ip += 4; \
        }
 
-               MINT_IN_CASE(MINT_LDSFLD_I1) LDSFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_U1) LDSFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_I2) LDSFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_U2) LDSFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_I4) LDSFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_I8) LDSFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_R4) LDSFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_R8) LDSFLD(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDSFLD_O) LDSFLD(p, gpointer); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_I1) LDSFLD(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_U1) LDSFLD(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_I2) LDSFLD(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_U2) LDSFLD(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_I4) LDSFLD(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_I8) LDSFLD(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_R4) LDSFLD(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_R8) LDSFLD(double, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDSFLD_O) LDSFLD(gpointer, gpointer); MINT_IN_BREAK;
 
                MINT_IN_CASE(MINT_LDSFLD_VT) {
-                       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [1]];
+                       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [2]];
                        INIT_VTABLE (vtable);
 
-                       gpointer addr = frame->imethod->data_items [ip [2]];
-                       int const i32 = READ32 (ip + 3);
+                       gpointer addr = frame->imethod->data_items [ip [3]];
+                       guint16 size = ip [4];
 
-                       memcpy (sp, addr, i32);
-                       sp = STACK_ADD_BYTES (sp, i32);
+                       memcpy (locals + ip [1], addr, size);
                        ip += 5;
                        MINT_IN_BREAK;
                }
 
-#define LDTSFLD(datamem, fieldtype) { \
+#define LDTSFLD(datatype, fieldtype) { \
        MonoInternalThread *thread = mono_thread_internal_current (); \
-       guint32 offset = READ32 (ip + 1); \
+       guint32 offset = READ32 (ip + 2); \
        gpointer addr = ((char*)thread->static_data [offset & 0x3f]) + (offset >> 6); \
-       sp[0].data.datamem = *(fieldtype*)addr; \
-       ip += 3; \
-       ++sp; \
-       }
-               MINT_IN_CASE(MINT_LDTSFLD_I1) LDTSFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_U1) LDTSFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_I2) LDTSFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_U2) LDTSFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_I4) LDTSFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_I8) LDTSFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_R4) LDTSFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_R8) LDTSFLD(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDTSFLD_O) LDTSFLD(p, gpointer); MINT_IN_BREAK;
+       LOCAL_VAR (ip [1], datatype) = *(fieldtype*)addr; \
+       ip += 4; \
+       }
+               MINT_IN_CASE(MINT_LDTSFLD_I1) LDTSFLD(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_U1) LDTSFLD(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_I2) LDTSFLD(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_U2) LDTSFLD(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_I4) LDTSFLD(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_I8) LDTSFLD(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_R4) LDTSFLD(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_R8) LDTSFLD(double, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDTSFLD_O) LDTSFLD(gpointer, gpointer); MINT_IN_BREAK;
 
                MINT_IN_CASE(MINT_LDSSFLD) {
-                       guint32 offset = READ32(ip + 2);
+                       guint32 offset = READ32(ip + 3);
                        gpointer addr = mono_get_special_static_data (offset);
-                       MonoClassField *field = (MonoClassField*)frame->imethod->data_items [ip [1]];
-                       stackval_from_data (field->type, sp, addr, FALSE);
-                       ip += 4;
-                       ++sp;
+                       MonoClassField *field = (MonoClassField*)frame->imethod->data_items [ip [2]];
+                       stackval_from_data (field->type, (stackval*)(locals + ip [1]), addr, FALSE);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDSSFLD_VT) {
-                       guint32 offset = READ32(ip + 1);
+                       guint32 offset = READ32(ip + 2);
                        gpointer addr = mono_get_special_static_data (offset);
-
-                       int size = READ32 (ip + 3);
-                       memcpy (sp, addr, size);
-                       sp = STACK_ADD_BYTES (sp, size);
+                       memcpy (locals + ip [1], addr, ip [4]);
                        ip += 5;
                        MINT_IN_BREAK;
                }
-#define STSFLD(datamem, fieldtype) { \
-       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [1]]; \
+#define STSFLD(datatype, fieldtype) { \
+       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [2]]; \
        INIT_VTABLE (vtable); \
-       sp --; \
-       * (fieldtype *)(frame->imethod->data_items [ip [2]]) = sp[0].data.datamem; \
-       ip += 3; \
+       * (fieldtype *)(frame->imethod->data_items [ip [3]]) = LOCAL_VAR (ip [1], datatype); \
+       ip += 4; \
        }
 
-               MINT_IN_CASE(MINT_STSFLD_I1) STSFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_U1) STSFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_I2) STSFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_U2) STSFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_I4) STSFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_I8) STSFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_R4) STSFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_R8) STSFLD(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STSFLD_O) STSFLD(p, gpointer); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_I1) STSFLD(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_U1) STSFLD(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_I2) STSFLD(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_U2) STSFLD(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_I4) STSFLD(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_I8) STSFLD(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_R4) STSFLD(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_R8) STSFLD(double, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STSFLD_O) STSFLD(gpointer, gpointer); MINT_IN_BREAK;
 
                MINT_IN_CASE(MINT_STSFLD_VT) {
-                       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [1]];
+                       MonoVTable *vtable = (MonoVTable*) frame->imethod->data_items [ip [2]];
                        INIT_VTABLE (vtable);
-                       int const i32 = READ32 (ip + 3);
-                       gpointer addr = frame->imethod->data_items [ip [2]];
-
-                       sp = STACK_SUB_BYTES (sp, i32);
-                       memcpy (addr, sp, i32);
-
+                       gpointer addr = frame->imethod->data_items [ip [3]];
+                       memcpy (addr, locals + ip [1], ip [4]);
                        ip += 5;
                        MINT_IN_BREAK;
                }
 
-#define STTSFLD(datamem, fieldtype) { \
+#define STTSFLD(datatype, fieldtype) { \
        MonoInternalThread *thread = mono_thread_internal_current (); \
-       guint32 offset = READ32 (ip + 1); \
+       guint32 offset = READ32 (ip + 2); \
        gpointer addr = ((char*)thread->static_data [offset & 0x3f]) + (offset >> 6); \
-       sp--; \
-       *(fieldtype*)addr = sp[0].data.datamem; \
-       ip += 3; \
+       *(fieldtype*)addr = LOCAL_VAR (ip [1], datatype); \
+       ip += 4; \
        }
 
-               MINT_IN_CASE(MINT_STTSFLD_I1) STTSFLD(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_U1) STTSFLD(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_I2) STTSFLD(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_U2) STTSFLD(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_I4) STTSFLD(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_I8) STTSFLD(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_R4) STTSFLD(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_R8) STTSFLD(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STTSFLD_O) STTSFLD(p, gpointer); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_I1) STTSFLD(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_U1) STTSFLD(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_I2) STTSFLD(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_U2) STTSFLD(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_I4) STTSFLD(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_I8) STTSFLD(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_R4) STTSFLD(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_R8) STTSFLD(double, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STTSFLD_O) STTSFLD(gpointer, gpointer); MINT_IN_BREAK;
 
                MINT_IN_CASE(MINT_STSSFLD) {
-                       guint32 offset = READ32(ip + 2);
+                       guint32 offset = READ32(ip + 3);
                        gpointer addr = mono_get_special_static_data (offset);
-                       MonoClassField *field = (MonoClassField*)frame->imethod->data_items [ip [1]];
-                       --sp;
-                       stackval_to_data (field->type, sp, addr, FALSE);
-                       ip += 4;
+                       MonoClassField *field = (MonoClassField*)frame->imethod->data_items [ip [2]];
+                       stackval_to_data (field->type, (stackval*)(locals + ip [1]), addr, FALSE);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_STSSFLD_VT) {
-                       guint32 offset = READ32(ip + 1);
+                       guint32 offset = READ32(ip + 2);
                        gpointer addr = mono_get_special_static_data (offset);
-                       int size = READ32 (ip + 3);
-
-                       sp = STACK_SUB_BYTES (sp, size);
-                       memcpy (addr, sp, size);
-
+                       memcpy (addr, locals + ip [1], ip [4]);
                        ip += 5;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_STOBJ_VT) {
-                       MonoClass *c = (MonoClass*)frame->imethod->data_items[ip [1]];
-                       int size = mono_class_value_size (c, NULL);
-
-                       sp = STACK_SUB_BYTES (sp, MINT_STACK_SLOT_SIZE + size);
-                       mono_value_copy_internal (sp [0].data.p, sp + 1, c);
-
-                       ip += 2;
+                       MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
+                       mono_value_copy_internal (LOCAL_VAR (ip [1], gpointer), locals + ip [2], c);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_CONV_OVF_I4_UN_R8)
-                       if (sp [-1].data.f < 0 || sp [-1].data.f > G_MAXINT32)
+               MINT_IN_CASE(MINT_CONV_OVF_I4_UN_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < 0 || val > G_MAXINT32)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint32)sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32)val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U8_I4)
-                       if (sp [-1].data.i < 0)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U8_I4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < 0)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.l = sp [-1].data.i;
-                       ++ip;
+                       LOCAL_VAR (ip [1], guint64) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U8_I8)
-                       if (sp [-1].data.l < 0)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U8_I8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < 0)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], guint64) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I8_U8)
-                       if ((guint64) sp [-1].data.l > G_MAXINT64)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I8_U8) {
+                       guint64 val = LOCAL_VAR (ip [2], guint64);
+                       if (val > G_MAXINT64)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_CONV_OVF_U8_R4) {
-                       guint64 res = (guint64)sp [-1].data.f_r4;
-                       if (mono_isnan (sp [-1].data.f_r4) || mono_trunc (sp [-1].data.f_r4) != res)
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (mono_isnan (val) || mono_trunc (val) != (guint64)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.l = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], guint64) = (guint64)val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CONV_OVF_U8_R8) {
-                       guint64 res = (guint64)sp [-1].data.f;
-                       if (mono_isnan (sp [-1].data.f) || mono_trunc (sp [-1].data.f) != res)
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (mono_isnan (val) || mono_trunc (val) != (guint64)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.l = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], guint64) = (guint64)val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CONV_OVF_I8_UN_R8) {
-                       gint64 res = (gint64)sp [-1].data.f;
-                       if (res < 0 || mono_isnan (sp [-1].data.f) || mono_trunc (sp [-1].data.f) != res)
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < 0 || mono_isnan (val) || mono_trunc (val) != (gint64)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.l = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = (gint64)val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CONV_OVF_I8_UN_R4) {
-                       gint64 res = (gint64)sp [-1].data.f_r4;
-                       if (res < 0 || mono_isnan (sp [-1].data.f_r4) || mono_trunc (sp [-1].data.f_r4) != res)
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (val < 0 || mono_isnan (val) || mono_trunc (val) != (gint64)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.l = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = (gint64)val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CONV_OVF_I8_R4) {
-                       gint64 res = (gint64)sp [-1].data.f_r4;
-                       if (mono_isnan (sp [-1].data.f_r4) || mono_trunc (sp [-1].data.f_r4) != res)
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (mono_isnan (val) || mono_trunc (val) != (gint64)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.l = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = (gint64)val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_CONV_OVF_I8_R8) {
-                       gint64 res = (gint64)sp [-1].data.f;
-                       if (mono_isnan (sp [-1].data.f) || mono_trunc (sp [-1].data.f) != res)
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (mono_isnan (val) || mono_trunc (val) != (gint64)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.l = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint64) = (gint64)val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_BOX) {
-                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [1]];
+                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [3]];
 
                        MonoObject *o = mono_gc_alloc_obj (vtable, m_class_get_instance_size (vtable->klass));
                        MONO_HANDLE_ASSIGN_RAW (tmp_handle, o);
-                       stackval_to_data (m_class_get_byval_arg (vtable->klass), &sp [-1], mono_object_get_data (o), FALSE);
+                       stackval_to_data (m_class_get_byval_arg (vtable->klass), (stackval*)(locals + ip [2]), mono_object_get_data (o), FALSE);
                        MONO_HANDLE_ASSIGN_RAW (tmp_handle, NULL);
 
-                       sp [-1].data.o = o;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], MonoObject*) = o;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_BOX_VT) {
-                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [1]];
+                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [3]];
                        MonoClass *c = vtable->klass;
 
-                       int size = mono_class_value_size (c, NULL);
-
                        MonoObject* o = mono_gc_alloc_obj (vtable, m_class_get_instance_size (c));
                        MONO_HANDLE_ASSIGN_RAW (tmp_handle, o);
-
-                       sp = STACK_SUB_BYTES (sp, size);
-                       mono_value_copy_internal (mono_object_get_data (o), sp, c);
+                       mono_value_copy_internal (mono_object_get_data (o), locals + ip [2], c);
                        MONO_HANDLE_ASSIGN_RAW (tmp_handle, NULL);
 
-                       sp [0].data.o = o;
-                       sp++;
-
-                       ip += 2;
+                       LOCAL_VAR (ip [1], MonoObject*) = o;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_BOX_PTR) {
-                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [1]];
+                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [3]];
                        MonoClass *c = vtable->klass;
-                       // This doesn't follow the current stack based design, but we plan to switch to explicit offsets.
-                       stackval *sp_ptr = (stackval*)(locals + frame->imethod->total_locals_size + ip [2]);
 
                        MonoObject* o = mono_gc_alloc_obj (vtable, m_class_get_instance_size (c));
                        MONO_HANDLE_ASSIGN_RAW (tmp_handle, o);
-                       mono_value_copy_internal (mono_object_get_data (o), sp_ptr->data.p, c);
+                       mono_value_copy_internal (mono_object_get_data (o), LOCAL_VAR (ip [2], gpointer), c);
                        MONO_HANDLE_ASSIGN_RAW (tmp_handle, NULL);
 
-                       sp_ptr->data.o = o;
-                       ip += 3;
+                       LOCAL_VAR (ip [1], MonoObject*) = o;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_BOX_NULLABLE_PTR) {
-                       MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [1]];
-                       // This doesn't follow the current stack based design, but we plan to switch to explicit offsets.
-                       stackval *sp_ptr = (stackval*)(locals + frame->imethod->total_locals_size + ip [2]);
+                       MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
 
-                       sp_ptr->data.o = mono_nullable_box (sp_ptr->data.p, c, error);
+                       LOCAL_VAR (ip [1], MonoObject*) = mono_nullable_box (LOCAL_VAR (ip [2], gpointer), c, error);
                        mono_interp_error_cleanup (error); /* FIXME: don't swallow the error */
-                       ip += 3;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_NEWARR) {
-                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items[ip [1]];
-                       sp [-1].data.o = (MonoObject*) mono_array_new_specific_checked (vtable, sp [-1].data.i, error);
+                       MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [3]];
+                       LOCAL_VAR (ip [1], MonoObject*) = (MonoObject*) mono_array_new_specific_checked (vtable, LOCAL_VAR (ip [2], gint32), error);
                        if (!is_ok (error)) {
                                THROW_EX (mono_error_convert_to_exception (error), ip);
                        }
-                       ip += 2;
+                       ip += 4;
                        /*if (profiling_classes) {
                                guint count = GPOINTER_TO_UINT (g_hash_table_lookup (profiling_classes, o->vtable->klass));
                                count++;
@@ -5778,109 +5619,103 @@ call:
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDLEN) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp [-1].data.nati = mono_array_length_internal ((MonoArray *)o);
-                       ++ip;
+                       LOCAL_VAR (ip [1], mono_u) = mono_array_length_internal ((MonoArray *)o);
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDLEN_SPAN) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       gsize offset_length = (gsize)(gint16)ip [1];
-                       sp [-1].data.nati = *(gint32 *) ((guint8 *) o + offset_length);
-                       ip += 2;
+                       // FIXME What's the point of this opcode ? It's just a LDFLD
+                       gsize offset_length = (gsize)(gint16)ip [3];
+                       LOCAL_VAR (ip [1], mono_u) = *(gint32 *) ((guint8 *) o + offset_length);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_GETCHR) {
-                       MonoString *s;
-                       s = (MonoString*)sp [-2].data.p;
+                       MonoString *s = LOCAL_VAR (ip [2], MonoString*);
                        NULL_CHECK (s);
-                       int const i32 = sp [-1].data.i;
+                       int i32 = LOCAL_VAR (ip [3], int);
                        if (i32 < 0 || i32 >= mono_string_length_internal (s))
                                THROW_EX (mono_get_exception_index_out_of_range (), ip);
-                       --sp;
-                       sp [-1].data.i = mono_string_chars_internal (s)[i32];
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = mono_string_chars_internal (s)[i32];
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_GETITEM_SPAN) {
-                       guint8 * const span = (guint8 *) sp [-2].data.p;
-                       const int index = sp [-1].data.i;
-                       sp--;
-
+                       guint8 *span = LOCAL_VAR (ip [2], guint8*);
+                       int index = LOCAL_VAR (ip [3], int);
                        NULL_CHECK (span);
 
-                       const gsize offset_length = (gsize)(gint16)ip [2];
+                       gsize offset_length = (gsize)(gint16)ip [5];
 
                        const gint32 length = *(gint32 *) (span + offset_length);
                        if (index < 0 || index >= length)
                                THROW_EX (mono_get_exception_index_out_of_range (), ip);
 
-                       const gsize element_size = (gsize)(gint16)ip [1];
-                       const gsize offset_pointer = (gsize)(gint16)ip [3];
+                       gsize element_size = (gsize)(gint16)ip [4];
+                       gsize offset_pointer = (gsize)(gint16)ip [6];
 
                        const gpointer pointer = *(gpointer *)(span + offset_pointer);
-                       sp [-1].data.p = (guint8 *) pointer + index * element_size;
+                       LOCAL_VAR (ip [1], gpointer) = (guint8 *) pointer + index * element_size;
 
-                       ip += 4;
+                       ip += 7;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_STRLEN) {
-                       ++ip;
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp [-1].data.i = mono_string_length_internal ((MonoString*) o);
+                       LOCAL_VAR (ip [1], gint32) = mono_string_length_internal ((MonoString*) o);
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_ARRAY_RANK) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp [-1].data.i = m_class_get_rank (mono_object_class (o));
-                       ip++;
+                       LOCAL_VAR (ip [1], gint32) = m_class_get_rank (mono_object_class (o));
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_ARRAY_ELEMENT_SIZE) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp [-1].data.i = mono_array_element_size (mono_object_class (o));
-                       ip++;
+                       LOCAL_VAR (ip [1], gint32) = mono_array_element_size (mono_object_class (o));
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_ARRAY_IS_PRIMITIVE) {
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
                        NULL_CHECK (o);
-                       sp [-1].data.i = m_class_is_primitive (m_class_get_element_class (mono_object_class (o)));
-                       ip++;
+                       LOCAL_VAR (ip [1], gint32) = m_class_is_primitive (m_class_get_element_class (mono_object_class (o)));
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDELEMA1) {
                        /* No bounds, one direction */
-                       MonoArray *ao = (MonoArray*)sp [-2].data.o;
+                       MonoArray *ao = LOCAL_VAR (ip [2], MonoArray*);
                        NULL_CHECK (ao);
-                       gint32 const index = sp [-1].data.i;
+                       gint32 index = LOCAL_VAR (ip [3], gint32);
                        if (index >= ao->max_length)
                                THROW_EX (mono_get_exception_index_out_of_range (), ip);
-                       gint32 const size = READ32 (ip + 1);
-                       sp [-2].data.p = mono_array_addr_with_size_fast (ao, size, index);
-                       ip += 3;
-                       sp --;
-
+                       guint16 size = ip [4];
+                       LOCAL_VAR (ip [1], gpointer) = mono_array_addr_with_size_fast (ao, size, index);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDELEMA) {
-                       guint16 rank = ip [1];
-                       gint32 const esize = READ32 (ip + 2);
-                       ip += 4;
-                       sp -= rank;
+                       guint16 rank = ip [2];
+                       guint16 esize = ip [3];
+                       stackval *sp = (stackval*)(locals + ip [1]);
 
-                       MonoArray* const ao = (MonoArray*) sp [-1].data.o;
+                       MonoArray *ao = (MonoArray*) sp [0].data.o;
                        NULL_CHECK (ao);
 
                        g_assert (ao->bounds);
                        guint32 pos = 0;
                        for (int i = 0; i < rank; i++) {
-                               gint32 idx = sp [i].data.i;
+                               gint32 idx = sp [i + 1].data.i;
                                gint32 lower = ao->bounds [i].lower_bound;
                                guint32 len = ao->bounds [i].length;
                                if (idx < lower || (guint32)(idx - lower) >= len)
@@ -5888,420 +5723,527 @@ call:
                                pos = (pos * len) + (guint32)(idx - lower);
                        }
 
-                       sp [-1].data.p = mono_array_addr_with_size_fast (ao, esize, pos);
+                       sp [0].data.p = mono_array_addr_with_size_fast (ao, esize, pos);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDELEMA_TC) {
-                       guint16 rank = ip [1];
-                       ip += 3;
-                       sp -= rank;
+                       stackval *sp = (stackval*)(locals + ip [1]);
 
-                       MonoObject* const o = sp [-1].data.o;
+                       MonoObject *o = (MonoObject*) sp [0].data.o;
                        NULL_CHECK (o);
 
-                       MonoClass *klass = (MonoClass*)frame->imethod->data_items [ip [-3 + 2]];
-                       const gboolean needs_typecheck = ip [-3] == MINT_LDELEMA_TC;
-                       MonoException *ex = ves_array_element_address (frame, klass, (MonoArray *) o, sp, needs_typecheck);
+                       MonoClass *klass = (MonoClass*)frame->imethod->data_items [ip [2]];
+                       MonoException *ex = ves_array_element_address (frame, klass, (MonoArray *) o, sp + 1, TRUE);
                        if (ex)
                                THROW_EX (ex, ip);
+                       ip += 3;
                        MINT_IN_BREAK;
                }
 
-#define LDELEM(datamem,elemtype) do { \
-       sp--; \
-       MonoArray *o = (MonoArray*)sp [-1].data.p; \
+#define LDELEM(datatype,elemtype) do { \
+       MonoArray *o = LOCAL_VAR (ip [2], MonoArray*); \
        NULL_CHECK (o); \
-       gint32 aindex = sp [0].data.i; \
+       gint32 aindex = LOCAL_VAR (ip [3], gint32); \
        if (aindex >= mono_array_length_internal (o)) \
                THROW_EX (mono_get_exception_index_out_of_range (), ip); \
-       sp [-1].data.datamem = mono_array_get_fast (o, elemtype, aindex); \
-       ip++; \
+       LOCAL_VAR (ip [1], datatype) = mono_array_get_fast (o, elemtype, aindex); \
+       ip += 4; \
 } while (0)
-               MINT_IN_CASE(MINT_LDELEM_I1) LDELEM(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_U1) LDELEM(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_I2) LDELEM(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_U2) LDELEM(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_I4) LDELEM(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_U4) LDELEM(i, guint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_I8) LDELEM(l, guint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_I)  LDELEM(nati, mono_i); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_R4) LDELEM(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_R8) LDELEM(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDELEM_REF) LDELEM(p, gpointer); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_I1) LDELEM(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_U1) LDELEM(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_I2) LDELEM(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_U2) LDELEM(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_I4) LDELEM(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_U4) LDELEM(gint32, guint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_I8) LDELEM(gint64, guint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_I)  LDELEM(mono_u, mono_i); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_R4) LDELEM(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_R8) LDELEM(double, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDELEM_REF) LDELEM(gpointer, gpointer); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDELEM_VT) {
-                       sp -= 2;
-                       MonoArray *o = (MonoArray*)sp [0].data.p;
+                       MonoArray *o = LOCAL_VAR (ip [2], MonoArray*);
                        NULL_CHECK (o);
-                       mono_u aindex = sp [1].data.i;
+                       mono_u aindex = LOCAL_VAR (ip [3], gint32);
                        if (aindex >= mono_array_length_internal (o))
                                THROW_EX (mono_get_exception_index_out_of_range (), ip);
 
-                       int i32 = READ32 (ip + 1);
-                       char *src_addr = mono_array_addr_with_size_fast ((MonoArray *) o, i32, aindex);
-                       memcpy (sp, src_addr, i32);
-                       sp = STACK_ADD_BYTES (sp, i32);
+                       guint16 size = ip [4];
+                       char *src_addr = mono_array_addr_with_size_fast ((MonoArray *) o, size, aindex);
+                       memcpy (locals + ip [1], src_addr, size);
 
-                       ip += 3;
+                       ip += 5;
                        MINT_IN_BREAK;
                }
 #define STELEM_PROLOG(o, aindex) do { \
-       sp -= 3; \
-       o = (MonoArray*)sp [0].data.p; \
+       o = LOCAL_VAR (ip [1], MonoArray*); \
        NULL_CHECK (o); \
-       aindex = sp [1].data.i; \
+       aindex = LOCAL_VAR (ip [2], gint32); \
        if (aindex >= mono_array_length_internal (o)) \
                THROW_EX (mono_get_exception_index_out_of_range (), ip); \
 } while (0)
 
-#define STELEM(datamem,elemtype) do { \
+#define STELEM(datatype, elemtype) do { \
        MonoArray *o; \
        gint32 aindex; \
        STELEM_PROLOG(o, aindex); \
-       mono_array_set_fast (o, elemtype, aindex, sp [2].data.datamem); \
-       ip++; \
+       mono_array_set_fast (o, elemtype, aindex, LOCAL_VAR (ip [3], datatype)); \
+       ip += 4; \
 } while (0)
-               MINT_IN_CASE(MINT_STELEM_I1) STELEM(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_U1) STELEM(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_I2) STELEM(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_U2) STELEM(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_I4) STELEM(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_I8) STELEM(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_I)  STELEM(nati, mono_i); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_R4) STELEM(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STELEM_R8) STELEM(f, double); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_I1) STELEM(gint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_U1) STELEM(gint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_I2) STELEM(gint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_U2) STELEM(gint32, guint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_I4) STELEM(gint32, gint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_I8) STELEM(gint64, gint64); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_I)  STELEM(mono_u, mono_i); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_R4) STELEM(float, float); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_STELEM_R8) STELEM(double, double); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_STELEM_REF) {
                        MonoArray *o;
                        gint32 aindex;
                        STELEM_PROLOG(o, aindex);
+                       MonoObject *ref = LOCAL_VAR (ip [3], MonoObject*);
 
-                       if (sp [2].data.o) {
-                               gboolean isinst = mono_interp_isinst (sp [2].data.o, m_class_get_element_class (mono_object_class (o)));
+                       if (ref) {
+                               gboolean isinst = mono_interp_isinst (ref, m_class_get_element_class (mono_object_class (o)));
                                if (!isinst)
                                        THROW_EX (mono_get_exception_array_type_mismatch (), ip);
                        }
-                       mono_array_setref_fast ((MonoArray *) o, aindex, sp [2].data.p);
-                       ip++;
+                       mono_array_setref_fast ((MonoArray *) o, aindex, ref);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_STELEM_VT) {
-                       int i32 = READ32 (ip + 2);
-                       sp = STACK_SUB_BYTES (sp, 2 * MINT_STACK_SLOT_SIZE + i32);
-                       MonoArray *o = (MonoArray*)sp [0].data.p;
+                       MonoArray *o = LOCAL_VAR (ip [1], MonoArray*);
                        NULL_CHECK (o);
-                       gint32 aindex = sp [1].data.i;
+                       gint32 aindex = LOCAL_VAR (ip [2], gint32);
                        if (aindex >= mono_array_length_internal (o))
                                THROW_EX (mono_get_exception_index_out_of_range (), ip);
 
-                       char *dst_addr = mono_array_addr_with_size_fast ((MonoArray *) o, i32, aindex);
-                       MonoClass *klass_vt = (MonoClass*)frame->imethod->data_items [ip [1]];
-                       mono_value_copy_internal (dst_addr, sp + 2, klass_vt);
-                       ip += 4;
+                       guint16 size = ip [5];
+                       char *dst_addr = mono_array_addr_with_size_fast ((MonoArray *) o, size, aindex);
+                       MonoClass *klass_vt = (MonoClass*)frame->imethod->data_items [ip [4]];
+                       mono_value_copy_internal (dst_addr, locals + ip [3], klass_vt);
+                       ip += 6;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_CONV_OVF_I4_U4)
-                       if (sp [-1].data.i < 0)
+               MINT_IN_CASE(MINT_CONV_OVF_I4_U4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < 0)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I4_I8)
-                       if (sp [-1].data.l < G_MININT32 || sp [-1].data.l > G_MAXINT32)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I4_I8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < G_MININT32 || val > G_MAXINT32)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint32) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I4_U8)
-                       if ((guint64)sp [-1].data.l > G_MAXINT32)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I4_U8) {
+                       guint64 val = LOCAL_VAR (ip [2], guint64);
+                       if (val > G_MAXINT32)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint32) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32) val;
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_CONV_OVF_I4_R4) {
-                       gint32 res = (gint32)sp [-1].data.f_r4;
-                       if (mono_isnan (sp [-1].data.f_r4) || mono_trunc (sp [-1].data.f_r4) != res)
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (mono_isnan (val) || mono_trunc (val) != (gint32)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32) val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_CONV_OVF_I4_R8)
-                       if (sp [-1].data.f < G_MININT32 || sp [-1].data.f > G_MAXINT32 || isnan (sp [-1].data.f))
+               MINT_IN_CASE(MINT_CONV_OVF_I4_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < G_MININT32 || val > G_MAXINT32 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint32) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint32)val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U4_I4)
-                       if (sp [-1].data.i < 0)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U4_I4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < 0)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U4_I8)
-                       if (sp [-1].data.l < 0 || sp [-1].data.l > G_MAXUINT32)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U4_I8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < 0 || val > G_MAXUINT32)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint32) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint32) val;
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_CONV_OVF_U4_R4) {
-                       guint32 res = (guint32)sp [-1].data.f_r4;
-                       if (mono_isnan (sp [-1].data.f_r4) || mono_trunc (sp [-1].data.f_r4) != res)
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (mono_isnan (val) || mono_trunc (val) != (guint32)val)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = res;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint32)val;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
-               MINT_IN_CASE(MINT_CONV_OVF_U4_R8)
-                       if (sp [-1].data.f < 0 || sp [-1].data.f > G_MAXUINT32 || isnan (sp [-1].data.f))
+               MINT_IN_CASE(MINT_CONV_OVF_U4_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < 0 || val > G_MAXUINT32 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint32) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint32) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_I4)
-                       if (sp [-1].data.i < G_MININT16 || sp [-1].data.i > G_MAXINT16)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_I4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < G_MININT16 || val > G_MAXINT16)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16)val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_U4)
-                       if (sp [-1].data.i < 0 || sp [-1].data.i > G_MAXINT16)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_U4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < 0 || val > G_MAXINT16)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16)val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_I8)
-                       if (sp [-1].data.l < G_MININT16 || sp [-1].data.l > G_MAXINT16)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_I8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < G_MININT16 || val > G_MAXINT16)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint16) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_U8)
-                       if (sp [-1].data.l < 0 || sp [-1].data.l > G_MAXINT16)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_U8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < 0 || val > G_MAXINT16)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint16) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_R4)
-                       if (sp [-1].data.f_r4 < G_MININT16 || sp [-1].data.f_r4 > G_MAXINT16 || isnan (sp [-1].data.f_r4))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_R4) {
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (val < G_MININT16 || val > G_MAXINT16 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint16) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_R8)
-                       if (sp [-1].data.f < G_MININT16 || sp [-1].data.f > G_MAXINT16 || isnan (sp [-1].data.f))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < G_MININT16 || val > G_MAXINT16 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint16) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_UN_R4)
-                       if (sp [-1].data.f_r4 < 0 || sp [-1].data.f_r4 > G_MAXINT16 || isnan (sp [-1].data.f_r4))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_UN_R4) {
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (val < 0 || val > G_MAXINT16 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint16) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I2_UN_R8)
-                       if (sp [-1].data.f < 0 || sp [-1].data.f > G_MAXINT16 || isnan (sp [-1].data.f))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I2_UN_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < 0 || val > G_MAXINT16 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint16) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint16) val;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U2_I4)
-                       if (sp [-1].data.i < 0 || sp [-1].data.i > G_MAXUINT16)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U2_I4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < 0 || val > G_MAXUINT16)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U2_I8)
-                       if (sp [-1].data.l < 0 || sp [-1].data.l > G_MAXUINT16)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U2_I8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < 0 || val > G_MAXUINT16)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint16) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U2_R4)
-                       if (sp [-1].data.f_r4 < 0 || sp [-1].data.f_r4 > G_MAXUINT16 || isnan (sp [-1].data.f_r4))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U2_R4) {
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (val < 0 || val > G_MAXUINT16 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint16) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U2_R8)
-                       if (sp [-1].data.f < 0 || sp [-1].data.f > G_MAXUINT16 || isnan (sp [-1].data.f))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U2_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < 0 || val > G_MAXUINT16 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint16) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint16) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_I4)
-                       if (sp [-1].data.i < G_MININT8 || sp [-1].data.i > G_MAXINT8)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_I4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < G_MININT8 || val > G_MAXINT8)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_U4)
-                       if (sp [-1].data.i < 0 || sp [-1].data.i > G_MAXINT8)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_U4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < 0 || val > G_MAXINT8)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_I8)
-                       if (sp [-1].data.l < G_MININT8 || sp [-1].data.l > G_MAXINT8)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_I8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < G_MININT8 || val > G_MAXINT8)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint8) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_U8)
-                       if (sp [-1].data.l < 0 || sp [-1].data.l > G_MAXINT8)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_U8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < 0 || val > G_MAXINT8)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint8) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_R4)
-                       if (sp [-1].data.f_r4 < G_MININT8 || sp [-1].data.f_r4 > G_MAXINT8 || isnan (sp [-1].data.f_r4))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_R4) {
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (val < G_MININT8 || val > G_MAXINT8 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint8) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_R8)
-                       if (sp [-1].data.f < G_MININT8 || sp [-1].data.f > G_MAXINT8 || isnan (sp [-1].data.f))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < G_MININT8 || val > G_MAXINT8 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint8) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_UN_R4)
-                       if (sp [-1].data.f_r4 < 0 || sp [-1].data.f_r4 > G_MAXINT8 || isnan (sp [-1].data.f_r4))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_UN_R4) {
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (val < 0 || val > G_MAXINT8 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint8) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_I1_UN_R8)
-                       if (sp [-1].data.f < 0 || sp [-1].data.f > G_MAXINT8 || isnan (sp [-1].data.f))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_I1_UN_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < 0 || val > G_MAXINT8 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (gint8) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (gint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U1_I4)
-                       if (sp [-1].data.i < 0 || sp [-1].data.i > G_MAXUINT8)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U1_I4) {
+                       gint32 val = LOCAL_VAR (ip [2], gint32);
+                       if (val < 0 || val > G_MAXUINT8)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U1_I8)
-                       if (sp [-1].data.l < 0 || sp [-1].data.l > G_MAXUINT8)
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U1_I8) {
+                       gint64 val = LOCAL_VAR (ip [2], gint64);
+                       if (val < 0 || val > G_MAXUINT8)
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint8) sp [-1].data.l;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U1_R4)
-                       if (sp [-1].data.f_r4 < 0 || sp [-1].data.f_r4 > G_MAXUINT8 || isnan (sp [-1].data.f_r4))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U1_R4) {
+                       float val = LOCAL_VAR (ip [2], float);
+                       if (val < 0 || val > G_MAXUINT8 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint8) sp [-1].data.f_r4;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CONV_OVF_U1_R8)
-                       if (sp [-1].data.f < 0 || sp [-1].data.f > G_MAXUINT8 || isnan (sp [-1].data.f))
+               }
+               MINT_IN_CASE(MINT_CONV_OVF_U1_R8) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (val < 0 || val > G_MAXUINT8 || isnan (val))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       sp [-1].data.i = (guint8) sp [-1].data.f;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (guint8) val;
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CKFINITE)
-                       if (!mono_isfinite (sp [-1].data.f))
+               }
+               MINT_IN_CASE(MINT_CKFINITE) {
+                       double val = LOCAL_VAR (ip [2], double);
+                       if (!mono_isfinite (val))
                                THROW_EX (mono_get_exception_arithmetic (), ip);
-                       ++ip;
+                       LOCAL_VAR (ip [1], double) = val;
+                       ip += 3;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_MKREFANY) {
-                       MonoClass* const c = (MonoClass*)frame->imethod->data_items [ip [1]];
+                       MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
 
-                       sp--;
-                       /* The value address is on the stack */
-                       gpointer addr = sp [0].data.p;
-                       /* Push the typedref value on the stack */
-                       MonoTypedRef *tref = (MonoTypedRef*)sp;
+                       gpointer addr = LOCAL_VAR (ip [2], gpointer);
+                       /* Write the typedref value */
+                       MonoTypedRef *tref = (MonoTypedRef*)(locals + ip [1]);
                        tref->klass = c;
                        tref->type = m_class_get_byval_arg (c);
                        tref->value = addr;
 
-                       sp = STACK_ADD_BYTES (sp, sizeof (MonoTypedRef));
-                       ip += 2;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_REFANYTYPE) {
-                       sp = STACK_SUB_BYTES (sp, sizeof (MonoTypedRef));
-                       MonoTypedRef *tref = (MonoTypedRef*)sp;
+                       MonoTypedRef *tref = (MonoTypedRef*)(locals + ip [2]);
 
-                       sp [0].data.p = tref->type;
-                       sp++;
-                       ip++;
+                       LOCAL_VAR (ip [1], gpointer) = tref->type;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_REFANYVAL) {
-                       sp = STACK_SUB_BYTES (sp, sizeof (MonoTypedRef));
-                       MonoTypedRef *tref = (MonoTypedRef*)sp;
+                       MonoTypedRef *tref = (MonoTypedRef*)(locals + ip [2]);
 
-                       MonoClass* const c = (MonoClass*)frame->imethod->data_items [ip [1]];
+                       MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
                        if (c != tref->klass)
                                THROW_EX (mono_get_exception_invalid_cast (), ip);
 
-                       sp [0].data.p = tref->value;
-                       sp++;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = tref->value;
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDTOKEN)
                        // FIXME same as MINT_MONO_LDPTR
-                       sp->data.p = frame->imethod->data_items [ip [1]];
-                       sp++;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = frame->imethod->data_items [ip [2]];
+                       ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_ADD_OVF_I4)
-                       if (CHECK_ADD_OVERFLOW (sp [-2].data.i, sp [-1].data.i))
+               MINT_IN_CASE(MINT_ADD_OVF_I4) {
+                       gint32 i1 = LOCAL_VAR (ip [2], gint32);
+                       gint32 i2 = LOCAL_VAR (ip [3], gint32);
+                       if (CHECK_ADD_OVERFLOW (i1, i2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(i, +);
+                       LOCAL_VAR (ip [1], gint32) = i1 + i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_ADD_OVF_I8)
-                       if (CHECK_ADD_OVERFLOW64 (sp [-2].data.l, sp [-1].data.l))
+               }
+               MINT_IN_CASE(MINT_ADD_OVF_I8) {
+                       gint64 l1 = LOCAL_VAR (ip [2], gint64);
+                       gint64 l2 = LOCAL_VAR (ip [3], gint64);
+                       if (CHECK_ADD_OVERFLOW64 (l1, l2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(l, +);
+                       LOCAL_VAR (ip [1], gint64) = l1 + l2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_ADD_OVF_UN_I4)
-                       if (CHECK_ADD_OVERFLOW_UN (sp [-2].data.i, sp [-1].data.i))
+               }
+               MINT_IN_CASE(MINT_ADD_OVF_UN_I4) {
+                       guint32 i1 = LOCAL_VAR (ip [2], guint32);
+                       guint32 i2 = LOCAL_VAR (ip [3], guint32);
+                       if (CHECK_ADD_OVERFLOW_UN (i1, i2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP_CAST(i, +, guint32);
+                       LOCAL_VAR (ip [1], guint32) = i1 + i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_ADD_OVF_UN_I8)
-                       if (CHECK_ADD_OVERFLOW64_UN (sp [-2].data.l, sp [-1].data.l))
+               }
+               MINT_IN_CASE(MINT_ADD_OVF_UN_I8) {
+                       guint64 l1 = LOCAL_VAR (ip [2], guint64);
+                       guint64 l2 = LOCAL_VAR (ip [3], guint64);
+                       if (CHECK_ADD_OVERFLOW64_UN (l1, l2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP_CAST(l, +, guint64);
+                       LOCAL_VAR (ip [1], guint64) = l1 + l2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_MUL_OVF_I4)
-                       if (CHECK_MUL_OVERFLOW (sp [-2].data.i, sp [-1].data.i))
+               }
+               MINT_IN_CASE(MINT_MUL_OVF_I4) {
+                       gint32 i1 = LOCAL_VAR (ip [2], gint32);
+                       gint32 i2 = LOCAL_VAR (ip [3], gint32);
+                       if (CHECK_MUL_OVERFLOW (i1, i2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(i, *);
+                       LOCAL_VAR (ip [1], gint32) = i1 * i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_MUL_OVF_I8)
-                       if (CHECK_MUL_OVERFLOW64 (sp [-2].data.l, sp [-1].data.l))
+               }
+               MINT_IN_CASE(MINT_MUL_OVF_I8) {
+                       gint64 l1 = LOCAL_VAR (ip [2], gint64);
+                       gint64 l2 = LOCAL_VAR (ip [3], gint64);
+                       if (CHECK_MUL_OVERFLOW64 (l1, l2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(l, *);
+                       LOCAL_VAR (ip [1], gint64) = l1 * l2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_MUL_OVF_UN_I4)
-                       if (CHECK_MUL_OVERFLOW_UN (sp [-2].data.i, sp [-1].data.i))
+               }
+               MINT_IN_CASE(MINT_MUL_OVF_UN_I4) {
+                       guint32 i1 = LOCAL_VAR (ip [2], guint32);
+                       guint32 i2 = LOCAL_VAR (ip [3], guint32);
+                       if (CHECK_MUL_OVERFLOW_UN (i1, i2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP_CAST(i, *, guint32);
+                       LOCAL_VAR (ip [1], guint32) = i1 * i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_MUL_OVF_UN_I8)
-                       if (CHECK_MUL_OVERFLOW64_UN (sp [-2].data.l, sp [-1].data.l))
+               }
+               MINT_IN_CASE(MINT_MUL_OVF_UN_I8) {
+                       guint64 l1 = LOCAL_VAR (ip [2], guint64);
+                       guint64 l2 = LOCAL_VAR (ip [3], guint64);
+                       if (CHECK_MUL_OVERFLOW64_UN (l1, l2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP_CAST(l, *, guint64);
+                       LOCAL_VAR (ip [1], guint64) = l1 * l2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_SUB_OVF_I4)
-                       if (CHECK_SUB_OVERFLOW (sp [-2].data.i, sp [-1].data.i))
+               }
+               MINT_IN_CASE(MINT_SUB_OVF_I4) {
+                       gint32 i1 = LOCAL_VAR (ip [2], gint32);
+                       gint32 i2 = LOCAL_VAR (ip [3], gint32);
+                       if (CHECK_SUB_OVERFLOW (i1, i2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(i, -);
+                       LOCAL_VAR (ip [1], gint32) = i1 - i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_SUB_OVF_I8)
-                       if (CHECK_SUB_OVERFLOW64 (sp [-2].data.l, sp [-1].data.l))
+               }
+               MINT_IN_CASE(MINT_SUB_OVF_I8) {
+                       gint64 l1 = LOCAL_VAR (ip [2], gint64);
+                       gint64 l2 = LOCAL_VAR (ip [3], gint64);
+                       if (CHECK_SUB_OVERFLOW64 (l1, l2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP(l, -);
+                       LOCAL_VAR (ip [1], gint64) = l1 - l2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_SUB_OVF_UN_I4)
-                       if (CHECK_SUB_OVERFLOW_UN (sp [-2].data.i, sp [-1].data.i))
+               }
+               MINT_IN_CASE(MINT_SUB_OVF_UN_I4) {
+                       guint32 i1 = LOCAL_VAR (ip [2], guint32);
+                       guint32 i2 = LOCAL_VAR (ip [3], guint32);
+                       if (CHECK_SUB_OVERFLOW_UN (i1, i2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP_CAST(i, -, guint32);
+                       LOCAL_VAR (ip [1], guint32) = i1 - i2;
+                       ip += 4;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_SUB_OVF_UN_I8)
-                       if (CHECK_SUB_OVERFLOW64_UN (sp [-2].data.l, sp [-1].data.l))
+               }
+               MINT_IN_CASE(MINT_SUB_OVF_UN_I8) {
+                       guint64 l1 = LOCAL_VAR (ip [2], guint64);
+                       guint64 l2 = LOCAL_VAR (ip [3], guint64);
+                       if (CHECK_SUB_OVERFLOW64_UN (l1, l2))
                                THROW_EX (mono_get_exception_overflow (), ip);
-                       BINOP_CAST(l, -, guint64);
+                       LOCAL_VAR (ip [1], gint64) = l1 - l2;
+                       ip += 4;
                        MINT_IN_BREAK;
+               }
                MINT_IN_CASE(MINT_START_ABORT_PROT)
                        mono_threads_begin_abort_protected_block ();
                        ip ++;
@@ -6310,9 +6252,6 @@ call:
                        mono_threads_end_abort_protected_block ();
                        guint16 clause_index = *(ip + 1);
 
-                       // endfinally empties the stack
-                       sp = (stackval*)(locals + frame->imethod->total_locals_size);
-
                        guint16 *ret_ip = *(guint16**)(locals + frame->imethod->clause_data_offsets [clause_index]);
                        if (!ret_ip) {
                                // this clause was called from EH, return to eh
@@ -6339,9 +6278,6 @@ call:
                MINT_IN_CASE(MINT_LEAVE_S)
                MINT_IN_CASE(MINT_LEAVE_CHECK)
                MINT_IN_CASE(MINT_LEAVE_S_CHECK) {
-                       // leave empties the stack
-                       sp = (stackval*)(locals + frame->imethod->total_locals_size);
-
                        int opcode = *ip;
                        gboolean const check = opcode == MINT_LEAVE_CHECK || opcode == MINT_LEAVE_S_CHECK;
 
@@ -6372,36 +6308,28 @@ call:
                MINT_IN_CASE(MINT_ICALL_PPPPP_P)
                MINT_IN_CASE(MINT_ICALL_PPPPPP_V)
                MINT_IN_CASE(MINT_ICALL_PPPPPP_P)
-                       frame->state.ip = ip + 2;
-                       sp = do_icall_wrapper (frame, NULL, *ip, sp, frame->imethod->data_items [ip [1]], FALSE);
+                       frame->state.ip = ip + 3;
+                       do_icall_wrapper (frame, NULL, *ip, (stackval*)(locals + ip [1]), frame->imethod->data_items [ip [2]], FALSE);
                        EXCEPTION_CHECKPOINT_GC_UNSAFE;
                        CHECK_RESUME_STATE (context);
-                       ip += 2;
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MONO_LDPTR) 
-                       sp->data.p = frame->imethod->data_items [ip [1]];
-                       ip += 2;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gpointer) = frame->imethod->data_items [ip [2]];
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MONO_NEWOBJ)
-                       sp->data.o = mono_interp_new (frame->imethod->domain, (MonoClass*)frame->imethod->data_items [ip [1]]); // FIXME: do not swallow the error
-                       ip += 2;
-                       sp++;
+                       LOCAL_VAR (ip [1], MonoObject*) = mono_interp_new (frame->imethod->domain, (MonoClass*)frame->imethod->data_items [ip [2]]); // FIXME: do not swallow the error
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MONO_RETOBJ)
-                       ++ip;
-                       sp--;
-                       g_assert_not_reached ();
-                       stackval_from_data (mono_method_signature_internal (frame->imethod->method)->ret, frame->retval, sp->data.p,
+                       stackval_from_data (mono_method_signature_internal (frame->imethod->method)->ret, frame->stack, LOCAL_VAR (ip [1], gpointer),
                             mono_method_signature_internal (frame->imethod->method)->pinvoke);
-                       if (sp > frame->stack)
-                               g_warning_d ("retobj: more values on stack: %d", sp - frame->stack);
                        frame_data_allocator_pop (&context->data_stack, frame);
                        goto exit_frame;
                MINT_IN_CASE(MINT_MONO_SGEN_THREAD_INFO)
-                       sp->data.p = mono_tls_get_sgen_thread_info ();
-                       sp++;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gpointer) = mono_tls_get_sgen_thread_info ();
+                       ip += 2;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MONO_MEMORY_BARRIER) {
                        ++ip;
@@ -6409,33 +6337,32 @@ call:
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_MONO_EXCHANGE_I8) {
-                       sp--;
                        gboolean flag = FALSE;
+                       gint64 *dest = LOCAL_VAR (ip [2], gint64*);
+                       gint64 exch = LOCAL_VAR (ip [3], gint64);
 #if SIZEOF_VOID_P == 4
-                       if (G_UNLIKELY ((size_t) ((gint64*) sp [-1].data.p) & 0x7)) {
+                       if (G_UNLIKELY (((size_t)dest) & 0x7)) {
                                gint64 result;
                                mono_interlocked_lock ();
-                               result = *((gint64*) sp [-1].data.p);
-                               *((gint64*) sp [-1].data.p) = sp [0].data.l;
+                               result = *dest;
+                               *dest = exch;
                                mono_interlocked_unlock ();
-                               sp [-1].data.l = result;
+                               LOCAL_VAR (ip [1], gint64) = result;
                                flag = TRUE;
                        }
 #endif
                        if (!flag)
-                               sp [-1].data.l = mono_atomic_xchg_i64 ((gint64*) sp [-1].data.p, sp [0].data.l);
-                       ++ip;
+                               LOCAL_VAR (ip [1], gint64) = mono_atomic_xchg_i64 (dest, exch);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_MONO_LDDOMAIN)
-                       sp->data.p = mono_domain_get ();
-                       ++sp;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gpointer) = mono_domain_get ();
+                       ip += 2;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_MONO_GET_SP)
-                       sp->data.p = frame;
-                       ++sp;
-                       ++ip;
+                       LOCAL_VAR (ip [1], gpointer) = frame;
+                       ip += 2;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SDB_INTR_LOC)
                        if (G_UNLIKELY (ss_enabled)) {
@@ -6491,137 +6418,136 @@ call:
                        MINT_IN_BREAK;
                }
 
-#define RELOP(datamem, op) \
-       --sp; \
-       sp [-1].data.i = sp [-1].data.datamem op sp [0].data.datamem; \
-       ++ip;
+#define RELOP(datatype, op) \
+       LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], datatype) op LOCAL_VAR (ip [3], datatype); \
+       ip += 4;
 
-#define RELOP_FP(datamem, op, noorder) \
-       --sp; \
-       if (mono_isunordered (sp [-1].data.datamem, sp [0].data.datamem)) \
-               sp [-1].data.i = noorder; \
+#define RELOP_FP(datatype, op, noorder) do { \
+       datatype a1 = LOCAL_VAR (ip [2], datatype); \
+       datatype a2 = LOCAL_VAR (ip [3], datatype); \
+       if (mono_isunordered (a1, a2)) \
+               LOCAL_VAR (ip [1], gint32) = noorder; \
        else \
-               sp [-1].data.i = sp [-1].data.datamem op sp [0].data.datamem; \
-       ++ip;
+               LOCAL_VAR (ip [1], gint32) = a1 op a2; \
+       ip += 4; \
+} while (0)
 
                MINT_IN_CASE(MINT_CEQ_I4)
-                       RELOP(i, ==);
+                       RELOP(gint32, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CEQ0_I4)
-                       sp [-1].data.i = (sp [-1].data.i == 0);
-                       ++ip;
+                       LOCAL_VAR (ip [1], gint32) = (LOCAL_VAR (ip [2], gint32) == 0);
+                       ip += 3;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CEQ_I8)
-                       RELOP(l, ==);
+                       RELOP(gint64, ==);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CEQ_R4)
-                       RELOP_FP(f_r4, ==, 0);
+                       RELOP_FP(float, ==, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CEQ_R8)
-                       RELOP_FP(f, ==, 0);
+                       RELOP_FP(double, ==, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CNE_I4)
-                       RELOP(i, !=);
+                       RELOP(gint32, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CNE_I8)
-                       RELOP(l, !=);
+                       RELOP(gint64, !=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CNE_R4)
-                       RELOP_FP(f_r4, !=, 1);
+                       RELOP_FP(float, !=, 1);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CNE_R8)
-                       RELOP_FP(f, !=, 1);
+                       RELOP_FP(double, !=, 1);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGT_I4)
-                       RELOP(i, >);
+                       RELOP(gint32, >);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGT_I8)
-                       RELOP(l, >);
+                       RELOP(gint64, >);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGT_R4)
-                       RELOP_FP(f_r4, >, 0);
+                       RELOP_FP(float, >, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGT_R8)
-                       RELOP_FP(f, >, 0);
+                       RELOP_FP(double, >, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGE_I4)
-                       RELOP(i, >=);
+                       RELOP(gint32, >=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGE_I8)
-                       RELOP(l, >=);
+                       RELOP(gint64, >=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGE_R4)
-                       RELOP_FP(f_r4, >=, 0);
+                       RELOP_FP(float, >=, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGE_R8)
-                       RELOP_FP(f, >=, 0);
+                       RELOP_FP(double, >=, 0);
                        MINT_IN_BREAK;
 
-#define RELOP_CAST(datamem, op, type) \
-       --sp; \
-       sp [-1].data.i = (type)sp [-1].data.datamem op (type)sp [0].data.datamem; \
-       ++ip;
+#define RELOP_CAST(datatype, op) \
+       LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], datatype) op LOCAL_VAR (ip [3], datatype); \
+       ip += 4;
 
                MINT_IN_CASE(MINT_CGE_UN_I4)
-                       RELOP_CAST(l, >=, guint32);
+                       RELOP_CAST(guint32, >=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGE_UN_I8)
-                       RELOP_CAST(l, >=, guint64);
+                       RELOP_CAST(guint64, >=);
                        MINT_IN_BREAK;
-
                MINT_IN_CASE(MINT_CGT_UN_I4)
-                       RELOP_CAST(i, >, guint32);
+                       RELOP_CAST(guint32, >);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGT_UN_I8)
-                       RELOP_CAST(l, >, guint64);
+                       RELOP_CAST(guint64, >);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGT_UN_R4)
-                       RELOP_FP(f_r4, >, 1);
+                       RELOP_FP(float, >, 1);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CGT_UN_R8)
-                       RELOP_FP(f, >, 1);
+                       RELOP_FP(double, >, 1);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_I4)
-                       RELOP(i, <);
+                       RELOP(gint32, <);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_I8)
-                       RELOP(l, <);
+                       RELOP(gint64, <);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_R4)
-                       RELOP_FP(f_r4, <, 0);
+                       RELOP_FP(float, <, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_R8)
-                       RELOP_FP(f, <, 0);
+                       RELOP_FP(double, <, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_UN_I4)
-                       RELOP_CAST(i, <, guint32);
+                       RELOP_CAST(guint32, <);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_UN_I8)
-                       RELOP_CAST(l, <, guint64);
+                       RELOP_CAST(guint64, <);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_UN_R4)
-                       RELOP_FP(f_r4, <, 1);
+                       RELOP_FP(float, <, 1);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLT_UN_R8)
-                       RELOP_FP(f, <, 1);
+                       RELOP_FP(double, <, 1);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLE_I4)
-                       RELOP(i, <=);
+                       RELOP(gint32, <=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLE_I8)
-                       RELOP(l, <=);
+                       RELOP(gint64, <=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLE_UN_I4)
-                       RELOP_CAST(l, <=, guint32);
+                       RELOP_CAST(guint32, <=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLE_UN_I8)
-                       RELOP_CAST(l, <=, guint64);
+                       RELOP_CAST(guint64, <=);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLE_R4)
-                       RELOP_FP(f_r4, <=, 0);
+                       RELOP_FP(float, <=, 0);
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_CLE_R8)
-                       RELOP_FP(f, <=, 0);
+                       RELOP_FP(double, <=, 0);
                        MINT_IN_BREAK;
 
 #undef RELOP
@@ -6629,27 +6555,25 @@ call:
 #undef RELOP_CAST
 
                MINT_IN_CASE(MINT_LDFTN) {
-                       sp->data.p = frame->imethod->data_items [ip [1]];
-                       ++sp;
-                       ip += 2;
+                       LOCAL_VAR (ip [1], gpointer) = frame->imethod->data_items [ip [2]];
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDVIRTFTN) {
-                       InterpMethod *m = (InterpMethod*)frame->imethod->data_items [ip [1]];
-                       --sp;
-                       NULL_CHECK (sp->data.p);
+                       InterpMethod *m = (InterpMethod*)frame->imethod->data_items [ip [3]];
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
+                       NULL_CHECK (o);
                                
-                       sp->data.p = get_virtual_method (m, sp->data.o->vtable);
-                       ip += 2;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gpointer) = get_virtual_method (m, o->vtable);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_LDFTN_DYNAMIC) {
                        error_init_reuse (error);
-                       InterpMethod *m = mono_interp_get_imethod (mono_domain_get (), (MonoMethod*) sp [-1].data.p, error);
+                       InterpMethod *m = mono_interp_get_imethod (mono_domain_get (), LOCAL_VAR (ip [2], MonoMethod*), error);
                        mono_error_assert_ok (error);
-                       sp [-1].data.p = m;
-                       ip++;
+                       LOCAL_VAR (ip [1], gpointer) = m;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_PROF_ENTER) {
@@ -6672,24 +6596,15 @@ call:
                        MINT_IN_BREAK;
                }
 
-               MINT_IN_CASE(MINT_PROF_EXIT)
-               MINT_IN_CASE(MINT_PROF_EXIT_VOID) {
-                       guint16 flag = ip [1];
+               MINT_IN_CASE(MINT_PROF_EXIT) {
+                       guint16 flag = ip [2];
                        // Set retval
-                       int const i32 = READ32 (ip + 2);
+                       int i32 = READ32 (ip + 3);
                        if (i32 == -1) {
                        } else if (i32) {
-                               sp = STACK_SUB_BYTES (sp, i32);
-                               if (frame->parent) {
-                                       memmove (frame->parent->state.sp, sp, i32);
-                                       frame->parent->state.sp = STACK_ADD_BYTES (frame->parent->state.sp, i32);
-                               }
+                               memmove (frame->stack, locals + ip [1], i32);
                        } else {
-                               sp--;
-                               if (frame->parent) {
-                                       frame->parent->state.sp [0] = *sp;
-                                       frame->parent->state.sp++;
-                               }
+                               frame->stack [0] = LOCAL_VAR (ip [1], stackval);
                        }
 
                        if ((flag & TRACING_FLAG) || ((flag & PROFILING_FLAG) && MONO_PROFILER_ENABLED (method_leave) &&
@@ -6698,7 +6613,7 @@ call:
                                prof_ctx->interp_frame = frame;
                                prof_ctx->method = frame->imethod->method;
                                if (i32 != -1)
-                                       prof_ctx->return_value = sp;
+                                       prof_ctx->return_value = frame->stack;
                                if (flag & TRACING_FLAG)
                                        mono_trace_leave_method (frame->imethod->method, frame->imethod->jinfo, prof_ctx);
                                if (flag & PROFILING_FLAG)
@@ -6708,7 +6623,6 @@ call:
                                MONO_PROFILER_RAISE (method_leave, (frame->imethod->method, NULL));
                        }
 
-                       ip += 4;
                        frame_data_allocator_pop (&context->data_stack, frame);
                        goto exit_frame;
                }
@@ -6720,143 +6634,76 @@ call:
                        MINT_IN_BREAK;
                }
 
-#define LDLOC(datamem, argtype) \
-       sp->data.datamem = * (argtype *)(locals + ip [1]); \
-       ip += 2; \
-       ++sp; 
-       
-               MINT_IN_CASE(MINT_LDLOC_I1) LDLOC(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_U1) LDLOC(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_I2) LDLOC(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_U2) LDLOC(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_I4) LDLOC(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_I8) LDLOC(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_R4) LDLOC(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_R8) LDLOC(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDLOC_O) LDLOC(p, gpointer); MINT_IN_BREAK;
-
-               MINT_IN_CASE(MINT_LDLOC_VT) {
-                       int const i32 = READ32 (ip + 2);
-                       memcpy (sp, locals + ip [1], i32);
-                       sp = STACK_ADD_BYTES (sp, i32);
-                       ip += 4;
-                       MINT_IN_BREAK;
-               }
                MINT_IN_CASE(MINT_LDLOCA_S)
-                       sp->data.p = locals + ip [1];
-                       ip += 2;
-                       ++sp;
+                       LOCAL_VAR (ip [1], gpointer) = locals + ip [2];
+                       ip += 3;
                        MINT_IN_BREAK;
 
-#define STLOC(datamem, argtype) \
-       --sp; \
-       * (argtype *)(locals + ip [1]) = sp->data.datamem; \
-       ip += 2;
-       
-               MINT_IN_CASE(MINT_STLOC_I1) STLOC(i, gint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_U1) STLOC(i, guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_I2) STLOC(i, gint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_U2) STLOC(i, guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_I4) STLOC(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_I8) STLOC(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_R4) STLOC(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_R8) STLOC(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_O) STLOC(p, gpointer); MINT_IN_BREAK;
-
-#define STLOC_NP(datamem, argtype) \
-       * (argtype *)(locals + ip [1]) = sp [-1].data.datamem; \
-       ip += 2;
-
-               MINT_IN_CASE(MINT_STLOC_NP_I4) STLOC_NP(i, gint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_NP_I8) STLOC_NP(l, gint64); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_NP_R4) STLOC_NP(f_r4, float); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_NP_R8) STLOC_NP(f, double); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_STLOC_NP_O) STLOC_NP(p, gpointer); MINT_IN_BREAK;
-
-               MINT_IN_CASE(MINT_STLOC_VT) {
-                       int const i32 = READ32 (ip + 2);
-                       sp = STACK_SUB_BYTES (sp, i32);
-                       memcpy (locals + ip [1], sp, i32);
-                       ip += 4;
-                       MINT_IN_BREAK;
-               }
 
-#define MOVLOC(argtype) \
-       * (argtype *)(locals + ip [2]) = * (argtype *)(locals + ip [1]); \
+#define MOV(argtype1,argtype2) \
+       LOCAL_VAR (ip [1], argtype1) = LOCAL_VAR (ip [2], argtype2); \
        ip += 3;
-
-               MINT_IN_CASE(MINT_MOVLOC_1) MOVLOC(guint8); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_MOVLOC_2) MOVLOC(guint16); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_MOVLOC_4) MOVLOC(guint32); MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_MOVLOC_8) MOVLOC(guint64); MINT_IN_BREAK;
-
-               MINT_IN_CASE(MINT_MOVLOC_VT) {
-                       int const i32 = READ32(ip + 3);
-                       memcpy (locals + ip [2], locals + ip [1], i32);
-                       ip += 5;
+               // When loading from a local, we might need to sign / zero extend to 4 bytes
+               // which is our minimum "register" size in interp. They are only needed when
+               // the address of the local is taken and we should try to optimize them out
+               // because the local can't be propagated.
+               MINT_IN_CASE(MINT_MOV_I1) MOV(guint32, gint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_MOV_U1) MOV(guint32, guint8); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_MOV_I2) MOV(guint32, gint16); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_MOV_U2) MOV(guint32, guint16); MINT_IN_BREAK;
+               // Normal moves between locals
+               MINT_IN_CASE(MINT_MOV_4) MOV(guint32, guint32); MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_MOV_8) MOV(guint64, guint64); MINT_IN_BREAK;
+
+               MINT_IN_CASE(MINT_MOV_VT) {
+                       guint16 size = ip [3];
+                       memmove (locals + ip [1], locals + ip [2], size);
+                       ip += 4;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_LOCALLOC) {
-                       stackval *sp_start = (stackval*)(locals + frame->imethod->total_locals_size);
-                       if (sp != sp_start + 1) /*FIX?*/
-                               THROW_EX (mono_get_exception_execution_engine (NULL), ip);
-
-                       int len = sp [-1].data.i;
-                       // FIXME we need a separate allocator for localloc sections
-                       sp [-1].data.p = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));
+                       int len = LOCAL_VAR (ip [2], gint32);
+                       gpointer mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));
 
                        if (frame->imethod->init_locals)
-                               memset (sp [-1].data.p, 0, len);
-                       ++ip;
+                               memset (mem, 0, len);
+                       LOCAL_VAR (ip [1], gpointer) = mem;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_ENDFILTER)
                        /* top of stack is result of filter */
-                       frame->retval->data.i = sp [-1].data.i;
+                       frame->retval->data.i = LOCAL_VAR (ip [1], gint32);
                        goto exit_clause;
                MINT_IN_CASE(MINT_INITOBJ)
-                       --sp;
-                       memset (sp->data.vt, 0, READ32(ip + 1));
+                       memset (LOCAL_VAR (ip [1], gpointer), 0, ip [2]);
                        ip += 3;
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_CPBLK)
-                       sp -= 3;
-                       if (!sp [0].data.p || !sp [1].data.p)
-                               THROW_EX (mono_get_exception_null_reference(), ip - 1);
-                       ++ip;
+               MINT_IN_CASE(MINT_CPBLK) {
+                       gpointer dest = LOCAL_VAR (ip [1], gpointer);
+                       gpointer src = LOCAL_VAR (ip [2], gpointer);
+                       if (!dest || !src)
+                               THROW_EX (mono_get_exception_null_reference(), ip);
                        /* FIXME: value and size may be int64... */
-                       memcpy (sp [0].data.p, sp [1].data.p, sp [2].data.i);
-                       MINT_IN_BREAK;
-#if 0
-               MINT_IN_CASE(MINT_CONSTRAINED_) {
-                       guint32 token;
-                       /* FIXME: implement */
-                       ++ip;
-                       token = READ32 (ip);
-                       ip += 2;
+                       memcpy (dest, src, LOCAL_VAR (ip [3], gint32));
+                       ip += 4;
                        MINT_IN_BREAK;
                }
-#endif
-               MINT_IN_CASE(MINT_INITBLK)
-                       sp -= 3;
-                       NULL_CHECK (sp [0].data.p);
-                       ++ip;
+               MINT_IN_CASE(MINT_INITBLK) {
+                       gpointer dest = LOCAL_VAR (ip [1], gpointer);
+                       NULL_CHECK (dest);
                        /* FIXME: value and size may be int64... */
-                       memset (sp [0].data.p, sp [1].data.i, sp [2].data.i);
-                       MINT_IN_BREAK;
-#if 0
-               MINT_IN_CASE(MINT_NO_)
-                       /* FIXME: implement */
-                       ip += 2;
+                       memset (dest, LOCAL_VAR (ip [2], gint32), LOCAL_VAR (ip [3], gint32));
+                       ip += 4;
                        MINT_IN_BREAK;
-#endif
-          MINT_IN_CASE(MINT_RETHROW) {
+               }
+               MINT_IN_CASE(MINT_RETHROW) {
                        int exvar_offset = ip [1];
                        THROW_EX_GENERAL (*(MonoException**)(frame_locals (frame) + exvar_offset), ip, TRUE);
                        MINT_IN_BREAK;
-          }
-          MINT_IN_CASE(MINT_MONO_RETHROW) {
+               }
+               MINT_IN_CASE(MINT_MONO_RETHROW) {
                        /* 
                         * need to clarify what this should actually do:
                         *
@@ -6865,40 +6712,35 @@ call:
                         * use CEE_THROW and lose the exception stacktrace. 
                         */
 
-                       --sp;
-                       if (!sp->data.p)
-                               sp->data.p = mono_get_exception_null_reference ();
-
-                       THROW_EX_GENERAL ((MonoException *)sp->data.p, ip, TRUE);
-                       MINT_IN_BREAK;
-          }
-          MINT_IN_CASE(MINT_LD_DELEGATE_METHOD_PTR) {
-                  MonoDelegate *del;
-
-                  --sp;
-                  del = (MonoDelegate*)sp->data.p;
-                  if (!del->interp_method) {
-                          /* Not created from interpreted code */
-                          error_init_reuse (error);
-                          g_assert (del->method);
-                          del->interp_method = mono_interp_get_imethod (del->object.vtable->domain, del->method, error);
-                          mono_error_assert_ok (error);
-                  }
-                  g_assert (del->interp_method);
-                  sp->data.p = del->interp_method;
-                  ++sp;
-                  ip += 1;
-                  MINT_IN_BREAK;
-          }
+                       MonoException *exc = LOCAL_VAR (ip [1], MonoException*);
+                       if (!exc)
+                               exc = mono_get_exception_null_reference ();
+
+                       THROW_EX_GENERAL (exc, ip, TRUE);
+                       MINT_IN_BREAK;
+               }
+               MINT_IN_CASE(MINT_LD_DELEGATE_METHOD_PTR) {
+                       MonoDelegate *del = LOCAL_VAR (ip [2], MonoDelegate*);
+                       if (!del->interp_method) {
+                               /* Not created from interpreted code */
+                               error_init_reuse (error);
+                               g_assert (del->method);
+                               del->interp_method = mono_interp_get_imethod (del->object.vtable->domain, del->method, error);
+                               mono_error_assert_ok (error);
+                       }
+                       g_assert (del->interp_method);
+                       LOCAL_VAR (ip [1], gpointer) = del->interp_method;
+                       ip += 3;
+                       MINT_IN_BREAK;
+               }
 
 #define MATH_UNOP(mathfunc) \
-       sp [-1].data.f = mathfunc (sp [-1].data.f); \
-       ++ip;
+       LOCAL_VAR (ip [1], double) = mathfunc (LOCAL_VAR (ip [2], double)); \
+       ip += 3;
 
 #define MATH_BINOP(mathfunc) \
-       sp--; \
-       sp [-1].data.f = mathfunc (sp [-1].data.f, sp [0].data.f); \
-       ++ip;
+       LOCAL_VAR (ip [1], double) = mathfunc (LOCAL_VAR (ip [2], double), LOCAL_VAR (ip [3], double)); \
+       ip += 4;
 
                MINT_IN_CASE(MINT_ABS) MATH_UNOP(fabs); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ASIN) MATH_UNOP(asin); MINT_IN_BREAK;
@@ -6925,37 +6767,34 @@ call:
                MINT_IN_CASE(MINT_ATAN2) MATH_BINOP(atan2); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_POW) MATH_BINOP(pow); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_FMA)
-                       sp -= 2;
-                       sp [-1].data.f = fma (sp [-1].data.f, sp [0].data.f, sp [1].data.f);
-                       ip++;
+                       LOCAL_VAR (ip [1], double) = fma (LOCAL_VAR (ip [2], double), LOCAL_VAR (ip [3], double), LOCAL_VAR (ip [4], double));
+                       ip += 5;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SCALEB)
-                       sp--;
-                       sp [-1].data.f = scalbn (sp [-1].data.f, sp [0].data.i);
-                       ip++;
+                       LOCAL_VAR (ip [1], double) = scalbn (LOCAL_VAR (ip [2], double), LOCAL_VAR (ip [3], gint32));
+                       ip += 4;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ILOGB) {
                        int result;
-                       double x = sp [-1].data.f;
+                       double x = LOCAL_VAR (ip [2], double);
                        if (FP_ILOGB0 != INT_MIN && x == 0.0)
                                result = INT_MIN;
                        else if (FP_ILOGBNAN != INT_MAX && isnan(x))
                                result = INT_MAX;
                        else
                                result = ilogb (x);
-                       sp [-1].data.i = result;
-                       ip++;
+                       LOCAL_VAR (ip [1],  gint32) = result;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
 
 #define MATH_UNOPF(mathfunc) \
-       sp [-1].data.f_r4 = mathfunc (sp [-1].data.f_r4); \
-       ++ip;
+       LOCAL_VAR (ip [1], float) = mathfunc (LOCAL_VAR (ip [2], float)); \
+       ip += 3;
 
 #define MATH_BINOPF(mathfunc) \
-       sp--; \
-       sp [-1].data.f_r4 = mathfunc (sp [-1].data.f_r4, sp [0].data.f_r4); \
-       ++ip;
+       LOCAL_VAR (ip [1], float) = mathfunc (LOCAL_VAR (ip [2], float), LOCAL_VAR (ip [3], float)); \
+       ip += 4;
                MINT_IN_CASE(MINT_ABSF) MATH_UNOPF(fabsf); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ASINF) MATH_UNOPF(asinf); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ASINHF) MATH_UNOPF(asinhf); MINT_IN_BREAK;
@@ -6981,45 +6820,43 @@ call:
                MINT_IN_CASE(MINT_ATAN2F) MATH_BINOPF(atan2f); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_POWF) MATH_BINOPF(powf); MINT_IN_BREAK;
                MINT_IN_CASE(MINT_FMAF)
-                       sp -= 2;
-                       sp [-1].data.f_r4 = fmaf (sp [-1].data.f_r4, sp [0].data.f_r4, sp [1].data.f_r4);
-                       ip++;
+                       LOCAL_VAR (ip [1], float) = fmaf (LOCAL_VAR (ip [2], float), LOCAL_VAR (ip [3], float), LOCAL_VAR (ip [4], float));
+                       ip += 5;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SCALEBF)
-                       sp--;
-                       sp [-1].data.f_r4 = scalbnf (sp [-1].data.f_r4, sp [0].data.i);
-                       ip++;
+                       LOCAL_VAR (ip [1], float) = scalbnf (LOCAL_VAR (ip [2], float), LOCAL_VAR (ip [3], gint32));
+                       ip += 4;
                        MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ILOGBF) {
                        int result;
-                       float x = sp [-1].data.f_r4;
+                       float x = LOCAL_VAR (ip [2], float);
                        if (FP_ILOGB0 != INT_MIN && x == 0.0)
                                result = INT_MIN;
                        else if (FP_ILOGBNAN != INT_MAX && isnan(x))
                                result = INT_MAX;
                        else
                                result = ilogbf (x);
-                       sp [-1].data.i = result;
-                       ip++;
+                       LOCAL_VAR (ip [1], gint32) = result;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
 
                MINT_IN_CASE(MINT_INTRINS_ENUM_HASFLAG) {
-                       MonoClass *klass = (MonoClass*)frame->imethod->data_items[ip [1]];
-                       mono_interp_enum_hasflag (sp, klass);
-                       sp--;
-                       ip += 2;
+                       MonoClass *klass = (MonoClass*)frame->imethod->data_items [ip [4]];
+                       LOCAL_VAR (ip [1], gint32) = mono_interp_enum_hasflag ((stackval*)(locals + ip [2]), (stackval*)(locals + ip [3]), klass);
+                       ip += 5;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_GET_HASHCODE) {
-                       sp [-1].data.i = mono_object_hash_internal (sp [-1].data.o);
-                       ip++;
+                       LOCAL_VAR (ip [1], gint32) = mono_object_hash_internal (LOCAL_VAR (ip [2], MonoObject*));
+                       ip += 3;
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_GET_TYPE) {
-                       NULL_CHECK (sp [-1].data.p);
-                       sp [-1].data.o = (MonoObject*) sp [-1].data.o->vtable->type;
-                       ip++;
+                       MonoObject *o = LOCAL_VAR (ip [2], MonoObject*);
+                       NULL_CHECK (o);
+                       LOCAL_VAR (ip [1], MonoObject*) = (MonoObject*) o->vtable->type;
+                       ip += 3;
                        MINT_IN_BREAK;
                }
 
@@ -7049,10 +6886,9 @@ resume:
                        ip = context->handler_ip;
                        /* spec says stack should be empty at endfinally so it should be at the start too */
                        locals = (guchar*)frame->stack;
-                       sp = (stackval*)(locals + frame->imethod->total_locals_size);
                        g_assert (context->exc_gchandle);
-                       sp->data.p = mono_gchandle_get_target_internal (context->exc_gchandle);
-                       ++sp;
+                       // Write the exception on to the first slot on the excecution stack
+                       LOCAL_VAR (frame->imethod->total_locals_size, MonoObject*) = mono_gchandle_get_target_internal (context->exc_gchandle);
 
                        clear_resume_state (context);
                        // goto main_loop instead of MINT_IN_DISPATCH helps the compiler and therefore conserves stack.
index 4d50657..86cb256 100644 (file)
@@ -35,28 +35,16 @@ unsigned char const mono_interp_oplen [] = {
 };
 #undef OPDEF
 
-#define Push0 0
-#define Push1 1
-#define Push2 2
-#define Pop0 0
-#define Pop1 1
-#define Pop2 2
-#define Pop3 3
-#define Pop4 4
-#define Pop5 5
-#define Pop6 6
-#define PopAll MINT_POP_ALL
-#define VarPush MINT_VAR_PUSH
-#define VarPop MINT_VAR_POP
+#define CallArgs MINT_CALL_ARGS
 
-#define OPDEF(a,b,c,d,e,f) d,
-int const mono_interp_oppop[] = {
+#define OPDEF(a,b,c,d,e,f) e,
+int const mono_interp_op_sregs[] = {
 #include "mintops.def"
 };
 #undef OPDEF
 
-#define OPDEF(a,b,c,d,e,f) e,
-int const mono_interp_oppush[] = {
+#define OPDEF(a,b,c,d,e,f) d,
+int const mono_interp_op_dregs[] = {
 #include "mintops.def"
 };
 #undef OPDEF
@@ -76,111 +64,13 @@ mono_interp_dis_mintop_len (const guint16 *ip)
                g_print ("op %d len %d\n", *ip, len);
                g_assert_not_reached ();
        } else if (len == 0) { /* SWITCH */
-               int n = READ32 (ip + 1);
+               int n = READ32 (ip + 2);
                len = MINT_SWITCH_LEN (n);
        }
 
        return ip + len;
 }
 
-/*
- * ins_offset is the associated offset of this instruction
- * native_offset indicates whether this instruction is part of the compacted
- * instruction stream or is part of an InterpInst
- * ip is the address where the arguments of the instruction are located
- */
-char *
-mono_interp_dis_mintop (gint32 ins_offset, gboolean native_offset, const guint16 *ip, guint16 opcode)
-{
-       GString *str = g_string_new ("");
-       guint32 token;
-       int target;
-
-       if (native_offset)
-               g_string_append_printf (str, "IR_%04x: %-10s", ins_offset, mono_interp_opname (opcode));
-       else
-               g_string_append_printf (str, "IL_%04x: %-10s", ins_offset, mono_interp_opname (opcode));
-
-       switch (mono_interp_opargtype [opcode]) {
-       case MintOpNoArgs:
-               break;
-       case MintOpUShortInt:
-               g_string_append_printf (str, " %u", *(guint16*)ip);
-               break;
-       case MintOpTwoShorts:
-               g_string_append_printf (str, " %u,%u", *(guint16*)ip, *(guint16 *)(ip + 1));
-               break;
-       case MintOpShortAndInt:
-               g_string_append_printf (str, " %u,%u", *(guint16*)ip, (guint32)READ32(ip + 1));
-               break;
-       case MintOpShortInt:
-               g_string_append_printf (str, " %d", *(gint16*)ip);
-               break;
-       case MintOpClassToken:
-       case MintOpMethodToken:
-       case MintOpFieldToken:
-               token = * (guint16 *) ip;
-               g_string_append_printf (str, " %u", token);
-               break;
-       case MintOpInt:
-               g_string_append_printf (str, " %d", (gint32)READ32 (ip));
-               break;
-       case MintOpLongInt:
-               g_string_append_printf (str, " %" PRId64, (gint64)READ64 (ip));
-               break;
-       case MintOpFloat: {
-               gint32 tmp = READ32 (ip);
-               g_string_append_printf (str, " %g", * (float *)&tmp);
-               break;
-       }
-       case MintOpDouble: {
-               gint64 tmp = READ64 (ip);
-               g_string_append_printf (str, " %g", * (double *)&tmp);
-               break;
-       }
-       case MintOpShortBranch:
-               if (native_offset) {
-                       target = ins_offset + *(gint16*)ip;
-                       g_string_append_printf (str, " IR_%04x", target);
-               } else {
-                       /* the target IL is already embedded in the instruction */
-                       g_string_append_printf (str, " IL_%04x", *(gint16*)ip);
-               }
-               break;
-       case MintOpBranch:
-               if (native_offset) {
-                       target = ins_offset + (gint32)READ32 (ip);
-                       g_string_append_printf (str, " IR_%04x", target);
-               } else {
-                       g_string_append_printf (str, " IL_%04x", (gint32)READ32 (ip));
-               }
-               break;
-       case MintOpSwitch: {
-               int sval = (gint32)READ32 (ip);
-               int i;
-               g_string_append_printf (str, "(");
-               gint32 p = 2;
-               for (i = 0; i < sval; ++i) {
-                       if (i > 0)
-                               g_string_append_printf (str, ", ");
-                       if (native_offset) {
-                               int offset = (gint32)READ32 (ip + p);
-                               g_string_append_printf (str, "IR_%04x", ins_offset + 1 + p + offset);
-                       } else {
-                               g_string_append_printf (str, "IL_%04x", (gint32)READ32 (ip + p));
-                       }
-                       p += 2;
-               }
-               g_string_append_printf (str, ")");
-               break;
-       }
-       default:
-               g_string_append_printf (str, "unknown arg type\n");
-       }
-
-       return g_string_free (str, FALSE);
-}
-
 const char*
 mono_interp_opname (int op)
 {
index 3ec3cd4..d94d90e 100644 (file)
  * Authors:
  *   Bernie Solomon (bernard@ugsolutions.com)
  *
+ * OPDEF (opsymbol, opstring, oplength (in uint16s), num_dregs (0 or 1), num_sregs, optype)
+ * optype describes the contents of the instruction, following the dreg/sreg offsets.
  */
 
-/* OPDEF (opsymbol, opstring, oplength (in uint16s), pop_n, push_n, optype) */
-
-OPDEF(MINT_NOP, "nop", 0, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_NIY, "niy", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_BREAK, "break", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_BREAKPOINT, "breakpoint", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_LDNULL, "ldnull", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_DUP, "dup", 1, Pop1, Push2, MintOpNoArgs)
-OPDEF(MINT_DUP_VT, "dup.vt", 3, Pop1, Push2, MintOpInt)
-OPDEF(MINT_POP, "pop", 1, Pop1, Push0, MintOpNoArgs)
-OPDEF(MINT_POP_VT, "pop.vt", 3, Pop1, Push0, MintOpNoArgs)
-OPDEF(MINT_POP1, "pop1", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_RET, "ret", 1, Pop1, Push0, MintOpNoArgs)
-OPDEF(MINT_RET_VOID, "ret.void", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_RET_VT, "ret.vt", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_RET_LOCALLOC, "ret.localloc", 1, Pop1, Push0, MintOpNoArgs)
-OPDEF(MINT_RET_VOID_LOCALLOC, "ret.void.localloc", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_RET_VT_LOCALLOC, "ret.vt.localloc", 3, Pop1, Push0, MintOpInt)
-
-OPDEF(MINT_LDC_I4_M1, "ldc.i4.m1", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_0, "ldc.i4.0", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_1, "ldc.i4.1", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_2, "ldc.i4.2", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_3, "ldc.i4.3", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_4, "ldc.i4.4", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_5, "ldc.i4.5", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_6, "ldc.i4.6", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_7, "ldc.i4.7", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_LDC_I4_8, "ldc.i4.8", 1, Pop0, Push1, MintOpNoArgs)
-
-OPDEF(MINT_LDC_I4_S, "ldc.i4.s", 2, Pop0, Push1, MintOpShortInt)
-OPDEF(MINT_LDC_I4, "ldc.i4", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDC_I8, "ldc.i8", 5, Pop0, Push1, MintOpLongInt)
-OPDEF(MINT_LDC_I8_S, "ldc.i8.s", 2, Pop0, Push1, MintOpShortInt)
-
-OPDEF(MINT_LDC_R4, "ldc.r4", 3, Pop0, Push1, MintOpFloat)
-OPDEF(MINT_LDC_R8, "ldc.r8", 5, Pop0, Push1, MintOpDouble)
-
-OPDEF(MINT_INIT_ARGLIST, "init_arglist", 3, Pop0, Push0, MintOpNoArgs)
-
-OPDEF(MINT_LDFLD_VT_I1, "ldfld.vt.i1", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_U1, "ldfld.vt.u1", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_I2, "ldfld.vt.i2", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_U2, "ldfld.vt.u2", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_I4, "ldfld.vt.i4", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_I8, "ldfld.vt.i8", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_R4, "ldfld.vt.r4", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_R8, "ldfld.vt.r8", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_O, "ldfld.vt.o", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_VT, "ldfld.vt.vt", 4, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_I8_UNALIGNED, "ldfld.vt.i8.unaligned", 3, Pop1, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDFLD_VT_R8_UNALIGNED, "ldfld.vt.r8.unaligned", 3, Pop1, Push1, MintOpTwoShorts)
-
-OPDEF(MINT_LDFLD_I1, "ldfld.i1", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_U1, "ldfld.u1", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_I2, "ldfld.i2", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_U2, "ldfld.u2", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_I4, "ldfld.i4", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_I8, "ldfld.i8", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_R4, "ldfld.r4", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_R8, "ldfld.r8", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_O, "ldfld.o", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_VT, "ldfld.vt", 4, Pop1, Push1, MintOpShortAndInt)
-OPDEF(MINT_LDFLD_I8_UNALIGNED, "ldfld.i8.unaligned", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLD_R8_UNALIGNED, "ldfld.r8.unaligned", 2, Pop1, Push1, MintOpUShortInt)
-
-OPDEF(MINT_LDRMFLD, "ldrmfld", 2, Pop1, Push1, MintOpFieldToken)
-OPDEF(MINT_LDRMFLD_VT, "ldrmfld.vt", 2, Pop1, Push1, MintOpUShortInt)
-
-OPDEF(MINT_LDFLDA, "ldflda", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDFLDA_UNSAFE, "ldflda.unsafe", 2, Pop1, Push1, MintOpUShortInt)
-
-OPDEF(MINT_LDLOCFLD_I1, "ldlocfld.i1", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_U1, "ldlocfld.u1", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_I2, "ldlocfld.i2", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_U2, "ldlocfld.u2", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_I4, "ldlocfld.i4", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_I8, "ldlocfld.i8", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_R4, "ldlocfld.r4", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_R8, "ldlocfld.r8", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDLOCFLD_O, "ldlocfld.o", 3, Pop0, Push1, MintOpTwoShorts)
-
-OPDEF(MINT_STFLD_I1, "stfld.i1", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_U1, "stfld.u1", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_I2, "stfld.i2", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_U2, "stfld.u2", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_I4, "stfld.i4", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_I8, "stfld.i8", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_R4, "stfld.r4", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_R8, "stfld.r8", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_O, "stfld.o", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_VT, "stfld.vt", 3, Pop2, Push0, MintOpTwoShorts)
-OPDEF(MINT_STFLD_VT_NOREF, "stfld.vt.noref", 3, Pop2, Push0, MintOpTwoShorts)
-OPDEF(MINT_STFLD_I8_UNALIGNED, "stfld.i8.unaligned", 2, Pop2, Push0, MintOpUShortInt)
-OPDEF(MINT_STFLD_R8_UNALIGNED, "stfld.r8.unaligned", 2, Pop2, Push0, MintOpUShortInt)
-
-OPDEF(MINT_STRMFLD, "strmfld", 2, Pop2, Push0, MintOpFieldToken)
-OPDEF(MINT_STRMFLD_VT, "strmfld.vt", 2, Pop2, Push0, MintOpUShortInt)
-
-OPDEF(MINT_STLOCFLD_I1, "stlocfld.i1", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_U1, "stlocfld.u1", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_I2, "stlocfld.i2", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_U2, "stlocfld.u2", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_I4, "stlocfld.i4", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_I8, "stlocfld.i8", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_R4, "stlocfld.r4", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_R8, "stlocfld.r8", 3, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_STLOCFLD_O, "stlocfld.o", 3, Pop1, Push0, MintOpTwoShorts)
-
-OPDEF(MINT_LDTSFLD_I1, "ldtsfld.i1", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_U1, "ldtsfld.u1", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_I2, "ldtsfld.i2", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_U2, "ldtsfld.u2", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_I4, "ldtsfld.i4", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_I8, "ldtsfld.i8", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_R4, "ldtsfld.r4", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_R8, "ldtsfld.r8", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDTSFLD_O, "ldtsfld.o", 3, Pop0, Push1, MintOpInt)
-OPDEF(MINT_LDSSFLD, "ldssfld", 4, Pop0, Push1, MintOpFieldToken)
-OPDEF(MINT_LDSSFLD_VT, "ldssfld.vt", 5, Pop0, Push1, MintOpInt)
-
-OPDEF(MINT_LDSFLD_I1, "ldsfld.i1", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_U1, "ldsfld.u1", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_I2, "ldsfld.i2", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_U2, "ldsfld.u2", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_I4, "ldsfld.i4", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_I8, "ldsfld.i8", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_R4, "ldsfld.r4", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_R8, "ldsfld.r8", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_O, "ldsfld.o", 3, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDSFLD_VT, "ldsfld.vt", 5, Pop0, Push1, MintOpTwoShorts)
-
-OPDEF(MINT_STTSFLD_I1, "sttsfld.i1", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_U1, "sttsfld.u1", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_I2, "sttsfld.i2", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_U2, "sttsfld.u2", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_I4, "sttsfld.i4", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_I8, "sttsfld.i8", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_R4, "sttsfld.r4", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_R8, "sttsfld.r8", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STTSFLD_O, "sttsfld.o", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STSSFLD, "stssfld", 4, Pop1, Push0, MintOpFieldToken)
-OPDEF(MINT_STSSFLD_VT, "stssfld.vt", 5, Pop1, Push0, MintOpInt)
-OPDEF(MINT_STSFLD_I1, "stsfld.i1", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_U1, "stsfld.u1", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_I2, "stsfld.i2", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_U2, "stsfld.u2", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_I4, "stsfld.i4", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_I8, "stsfld.i8", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_R4, "stsfld.r4", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_R8, "stsfld.r8", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_O, "stsfld.o", 3, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STSFLD_VT, "stsfld.vt", 5, Pop1, Push0, MintOpTwoShorts)
-OPDEF(MINT_LDSFLDA, "ldsflda", 3, Pop0, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDSSFLDA, "ldssflda", 3, Pop0, Push1, MintOpInt)
-
-OPDEF(MINT_LDLOC_I1, "ldloc.i1", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_U1, "ldloc.u1", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_I2, "ldloc.i2", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_U2, "ldloc.u2", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_I4, "ldloc.i4", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_I8, "ldloc.i8", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_R4, "ldloc.r4", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_R8, "ldloc.r8", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_O, "ldloc.o", 2, Pop0, Push1, MintOpUShortInt)
-OPDEF(MINT_LDLOC_VT, "ldloc.vt", 4, Pop0, Push1, MintOpShortAndInt)
-
-OPDEF(MINT_STLOC_I1, "stloc.i1", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_U1, "stloc.u1", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_I2, "stloc.i2", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_U2, "stloc.u2", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_I4, "stloc.i4", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_I8, "stloc.i8", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_R4, "stloc.r4", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_R8, "stloc.r8", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_O, "stloc.o", 2, Pop1, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_VT, "stloc.vt", 4, Pop1, Push0, MintOpShortAndInt)
-
-OPDEF(MINT_STLOC_NP_I4, "stloc.np.i4", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_NP_I8, "stloc.np.i8", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_NP_R4, "stloc.np.R4", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_NP_R8, "stloc.np.R8", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_STLOC_NP_O, "stloc.np.o", 2, Pop0, Push0, MintOpUShortInt)
-
-OPDEF(MINT_MOVLOC_1, "movloc.1", 3, Pop0, Push0, MintOpTwoShorts)
-OPDEF(MINT_MOVLOC_2, "movloc.2", 3, Pop0, Push0, MintOpTwoShorts)
-OPDEF(MINT_MOVLOC_4, "movloc.4", 3, Pop0, Push0, MintOpTwoShorts)
-OPDEF(MINT_MOVLOC_8, "movloc.8", 3, Pop0, Push0, MintOpTwoShorts)
-OPDEF(MINT_MOVLOC_VT, "movloc.vt", 5, Pop0, Push0, MintOpTwoShorts)
-
-OPDEF(MINT_LDLOCA_S, "ldloca.s", 2, Pop0, Push1, MintOpUShortInt)
-
-OPDEF(MINT_LDIND_I1_CHECK, "ldind.i1.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_U1_CHECK, "ldind.u1.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_I2_CHECK, "ldind.i2.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_U2_CHECK, "ldind.u2.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_I4_CHECK, "ldind.i4.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_U4_CHECK, "ldind.u4.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_I8_CHECK, "ldind.i8.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_I, "ldind.i", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDIND_I8, "ldind.i8", 2, Pop1, Push1, MintOpUShortInt)
-OPDEF(MINT_LDIND_R4_CHECK, "ldind.r4.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_R8_CHECK, "ldind.r8.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_REF, "ldind.ref", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDIND_REF_CHECK, "ldind.ref.check", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_STIND_I1, "stind.i1", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_STIND_I2, "stind.i2", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_STIND_I4, "stind.i4", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_STIND_I8, "stind.i8", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_STIND_I, "stind.i", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_STIND_R4, "stind.r4", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_STIND_R8, "stind.r8", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_STIND_REF, "stind.ref", 1, Pop2, Push0, MintOpNoArgs)
-
-OPDEF(MINT_BR, "br", 3, Pop0, Push0, MintOpBranch)
-OPDEF(MINT_LEAVE, "leave", 3, PopAll, Push0, MintOpBranch)
-OPDEF(MINT_LEAVE_CHECK, "leave.check", 3, PopAll, Push0, MintOpBranch)
-OPDEF(MINT_BR_S, "br.s", 2, Pop0, Push0, MintOpShortBranch)
-OPDEF(MINT_LEAVE_S, "leave.s", 2, PopAll, Push0, MintOpShortBranch)
-OPDEF(MINT_LEAVE_S_CHECK, "leave.s.check", 2, PopAll, Push0, MintOpShortBranch)
-OPDEF(MINT_CALL_HANDLER, "call_handler", 4, Pop0, Push0, MintOpBranch)
-OPDEF(MINT_CALL_HANDLER_S, "call_handler.s", 3, Pop0, Push0, MintOpShortBranch)
-
-OPDEF(MINT_THROW, "throw", 1, Pop1, Push0, MintOpNoArgs)
-OPDEF(MINT_RETHROW, "rethrow", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_ENDFINALLY, "endfinally", 2, PopAll, Push0, MintOpShortInt)
-OPDEF(MINT_MONO_RETHROW, "mono_rethrow", 1, Pop1, Push0, MintOpNoArgs)
-
-OPDEF(MINT_CHECKPOINT, "checkpoint", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_SAFEPOINT, "safepoint", 1, Pop0, Push0, MintOpNoArgs)
-
-OPDEF(MINT_BRFALSE_I4, "brfalse.i4", 3, Pop1, Push0, MintOpBranch)
-OPDEF(MINT_BRFALSE_I8, "brfalse.i8", 3, Pop1, Push0, MintOpBranch)
-OPDEF(MINT_BRFALSE_R4, "brfalse.r4", 3, Pop1, Push0, MintOpBranch)
-OPDEF(MINT_BRFALSE_R8, "brfalse.r8", 3, Pop1, Push0, MintOpBranch)
-OPDEF(MINT_BRTRUE_I4, "brtrue.i4", 3, Pop1, Push0, MintOpBranch)
-OPDEF(MINT_BRTRUE_I8, "brtrue.i8", 3, Pop1, Push0, MintOpBranch)
-OPDEF(MINT_BRTRUE_R4, "brtrue.r4", 3, Pop1, Push0, MintOpBranch)
-OPDEF(MINT_BRTRUE_R8, "brtrue.r8", 3, Pop1, Push0, MintOpBranch)
-
-OPDEF(MINT_BRFALSE_I4_S, "brfalse.i4.s", 2, Pop1, Push0, MintOpShortBranch)
-OPDEF(MINT_BRFALSE_I8_S, "brfalse.i8.s", 2, Pop1, Push0, MintOpShortBranch)
-OPDEF(MINT_BRFALSE_R4_S, "brfalse.r4.s", 2, Pop1, Push0, MintOpShortBranch)
-OPDEF(MINT_BRFALSE_R8_S, "brfalse.r8.s", 2, Pop1, Push0, MintOpShortBranch)
-OPDEF(MINT_BRTRUE_I4_S, "brtrue.i4.s", 2, Pop1, Push0, MintOpShortBranch)
-OPDEF(MINT_BRTRUE_I8_S, "brtrue.i8.s", 2, Pop1, Push0, MintOpShortBranch)
-OPDEF(MINT_BRTRUE_R4_S, "brtrue.r4.s", 2, Pop1, Push0, MintOpShortBranch)
-OPDEF(MINT_BRTRUE_R8_S, "brtrue.r8.s", 2, Pop1, Push0, MintOpShortBranch)
-
-OPDEF(MINT_BEQ_I4, "beq.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BEQ_I8, "beq.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BEQ_R4, "beq.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BEQ_R8, "beq.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_I4, "bge.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_I8, "bge.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_R4, "bge.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_R8, "bge.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_I4, "bgt.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_I8, "bgt.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_R4, "bgt.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_R8, "bgt.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_I4, "blt.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_I8, "blt.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_R4, "blt.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_R8, "blt.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_I4, "ble.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_I8, "ble.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_R4, "ble.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_R8, "ble.r8", 3, Pop2, Push0, MintOpBranch)
-
-OPDEF(MINT_BNE_UN_I4, "bne.un.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BNE_UN_I8, "bne.un.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BNE_UN_R4, "bne.un.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BNE_UN_R8, "bne.un.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_UN_I4, "bge.un.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_UN_I8, "bge.un.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_UN_R4, "bge.un.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGE_UN_R8, "bge.un.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_UN_I4, "bgt.un.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_UN_I8, "bgt.un.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_UN_R4, "bgt.un.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BGT_UN_R8, "bgt.un.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_UN_I4, "ble.un.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_UN_I8, "ble.un.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_UN_R4, "ble.un.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLE_UN_R8, "ble.un.r8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_UN_I4, "blt.un.i4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_UN_I8, "blt.un.i8", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_UN_R4, "blt.un.r4", 3, Pop2, Push0, MintOpBranch)
-OPDEF(MINT_BLT_UN_R8, "blt.un.r8", 3, Pop2, Push0, MintOpBranch)
-
-OPDEF(MINT_BEQ_I4_S, "beq.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BEQ_I8_S, "beq.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BEQ_R4_S, "beq.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BEQ_R8_S, "beq.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_I4_S, "bge.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_I8_S, "bge.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_R4_S, "bge.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_R8_S, "bge.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_I4_S, "bgt.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_I8_S, "bgt.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_R4_S, "bgt.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_R8_S, "bgt.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_I4_S, "blt.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_I8_S, "blt.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_R4_S, "blt.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_R8_S, "blt.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_I4_S, "ble.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_I8_S, "ble.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_R4_S, "ble.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_R8_S, "ble.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-
-OPDEF(MINT_BNE_UN_I4_S, "bne.un.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BNE_UN_I8_S, "bne.un.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BNE_UN_R4_S, "bne.un.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BNE_UN_R8_S, "bne.un.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_UN_I4_S, "bge.un.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_UN_I8_S, "bge.un.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_UN_R4_S, "bge.un.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGE_UN_R8_S, "bge.un.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_UN_I4_S, "bgt.un.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_UN_I8_S, "bgt.un.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_UN_R4_S, "bgt.un.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BGT_UN_R8_S, "bgt.un.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_UN_I4_S, "ble.un.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_UN_I8_S, "ble.un.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_UN_R4_S, "ble.un.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLE_UN_R8_S, "ble.un.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_UN_I4_S, "blt.un.i4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_UN_I8_S, "blt.un.i8.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_UN_R4_S, "blt.un.r4.s", 2, Pop2, Push0, MintOpShortBranch)
-OPDEF(MINT_BLT_UN_R8_S, "blt.un.r8.s", 2, Pop2, Push0, MintOpShortBranch)
-
-OPDEF(MINT_SWITCH, "switch", 0, Pop1, Push0, MintOpSwitch)
-
-OPDEF(MINT_LDSTR, "ldstr", 2, Pop0, Push1, MintOpMethodToken) /* not really */
-OPDEF(MINT_LDSTR_TOKEN, "ldstr.token", 2, Pop0, Push1, MintOpMethodToken) /* not really */
-
-OPDEF(MINT_JMP, "jmp", 2, Pop0, Push0, MintOpMethodToken)
-
-OPDEF(MINT_ENDFILTER, "endfilter", 1, Pop0, Push0, MintOpNoArgs)
-
-OPDEF(MINT_NEWOBJ, "newobj", 3, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_NEWOBJ_ARRAY, "newobj_array", 3, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_NEWOBJ_STRING, "newobj_string", 3, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_NEWOBJ_FAST, "newobj_fast", 5, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_NEWOBJ_VT_FAST, "newobj_vt_fast", 5, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_NEWOBJ_MAGIC, "newobj_magic", 2, Pop0, Push0, MintOpMethodToken)
-OPDEF(MINT_INITOBJ, "initobj", 3, Pop1, Push0, MintOpInt)
-OPDEF(MINT_CASTCLASS, "castclass", 2, Pop0, Push0, MintOpClassToken)
-OPDEF(MINT_ISINST, "isinst", 2, Pop1, Push1, MintOpClassToken)
-OPDEF(MINT_CASTCLASS_INTERFACE, "castclass.interface", 2, Pop0, Push0, MintOpClassToken)
-OPDEF(MINT_ISINST_INTERFACE, "isinst.interface", 2, Pop1, Push1, MintOpClassToken)
-OPDEF(MINT_CASTCLASS_COMMON, "castclass.common", 2, Pop0, Push0, MintOpClassToken)
-OPDEF(MINT_ISINST_COMMON, "isinst.common", 2, Pop1, Push1, MintOpClassToken)
-OPDEF(MINT_NEWARR, "newarr", 2, Pop1, Push1, MintOpClassToken)
-OPDEF(MINT_BOX, "box", 2, Pop1, Push1, MintOpShortInt)
-OPDEF(MINT_BOX_VT, "box.vt", 2, Pop1, Push1, MintOpShortInt)
-OPDEF(MINT_BOX_PTR, "box.ptr", 3, Pop0, Push0, MintOpTwoShorts)
-OPDEF(MINT_BOX_NULLABLE_PTR, "box.nullable.ptr", 3, Pop0, Push0, MintOpTwoShorts)
-OPDEF(MINT_UNBOX, "unbox", 2, Pop1, Push1, MintOpClassToken)
-OPDEF(MINT_LDTOKEN, "ldtoken", 2, Pop0, Push1, MintOpClassToken) /* not really */
-OPDEF(MINT_LDFTN, "ldftn", 2, Pop0, Push1, MintOpMethodToken)
-OPDEF(MINT_LDFTN_DYNAMIC, "ldftn.dynamic", 1, Pop1, Push1, MintOpMethodToken)
-OPDEF(MINT_LDVIRTFTN, "ldvirtftn", 2, Pop1, Push1, MintOpMethodToken)
-OPDEF(MINT_CPOBJ, "cpobj", 2, Pop2, Push0, MintOpClassToken)
-OPDEF(MINT_CPOBJ_VT, "cpobj.vt", 2, Pop2, Push0, MintOpClassToken)
-OPDEF(MINT_LDOBJ_VT, "ldobj.vt", 3, Pop1, Push1, MintOpInt)
-OPDEF(MINT_STOBJ_VT, "stobj.vt", 2, Pop2, Push0, MintOpClassToken)
-OPDEF(MINT_CPBLK, "cpblk", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_INITBLK, "initblk", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_LOCALLOC, "localloc", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INITLOCALS, "initlocals", 3, Pop0, Push0, MintOpTwoShorts)
-
-OPDEF(MINT_LDELEM_I, "ldelem.i", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_I1, "ldelem.i1", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_U1, "ldelem.u1", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_I2, "ldelem.i2", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_U2, "ldelem.u2", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_I4, "ldelem.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_U4, "ldelem.u4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_I8, "ldelem.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_R4, "ldelem.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_R8, "ldelem.r8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_REF, "ldelem.ref", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_LDELEM_VT, "ldelem.vt", 3, Pop2, Push1, MintOpInt)
-
-OPDEF(MINT_LDELEMA1, "ldelema1", 3, Pop2, Push1, MintOpInt)
-OPDEF(MINT_LDELEMA, "ldelema", 4, VarPop, Push1, MintOpTwoShorts)
-OPDEF(MINT_LDELEMA_TC, "ldelema.tc", 3, VarPop, Push1, MintOpTwoShorts)
-
-OPDEF(MINT_STELEM_I, "stelem.i", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_I1, "stelem.i1", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_U1, "stelem.u1", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_I2, "stelem.i2", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_U2, "stelem.u2", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_I4, "stelem.i4", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_I8, "stelem.i8", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_R4, "stelem.r4", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_R8, "stelem.r8", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_REF, "stelem.ref", 1, Pop3, Push0, MintOpNoArgs)
-OPDEF(MINT_STELEM_VT, "stelem.vt", 4, Pop3, Push0, MintOpShortAndInt)
-
-OPDEF(MINT_LDLEN, "ldlen", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LDLEN_SPAN, "ldlen.span", 2, Pop1, Push1, MintOpShortInt)
-
-OPDEF(MINT_GETITEM_SPAN, "getitem.span", 4, Pop2, Push1, MintOpShortAndInt)
+OPDEF(MINT_NOP, "nop", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_NIY, "niy", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_BREAK, "break", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_BREAKPOINT, "breakpoint", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_LDNULL, "ldnull", 2, 1, 0, MintOpNoArgs)
+
+OPDEF(MINT_RET, "ret", 2, 0, 1, MintOpNoArgs)
+OPDEF(MINT_RET_VOID, "ret.void", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_RET_VT, "ret.vt", 3, 0, 1, MintOpShortInt)
+OPDEF(MINT_RET_LOCALLOC, "ret.localloc", 2, 0, 1, MintOpNoArgs)
+OPDEF(MINT_RET_VOID_LOCALLOC, "ret.void.localloc", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_RET_VT_LOCALLOC, "ret.vt.localloc", 3, 0, 1, MintOpShortInt)
+
+OPDEF(MINT_LDC_I4_M1, "ldc.i4.m1", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_0, "ldc.i4.0", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_1, "ldc.i4.1", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_2, "ldc.i4.2", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_3, "ldc.i4.3", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_4, "ldc.i4.4", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_5, "ldc.i4.5", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_6, "ldc.i4.6", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_7, "ldc.i4.7", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_LDC_I4_8, "ldc.i4.8", 2, 1, 0, MintOpNoArgs)
+
+OPDEF(MINT_LDC_I4_S, "ldc.i4.s", 3, 1, 0, MintOpShortInt)
+OPDEF(MINT_LDC_I4, "ldc.i4", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDC_I8, "ldc.i8", 6, 1, 0, MintOpLongInt)
+OPDEF(MINT_LDC_I8_S, "ldc.i8.s", 3, 1, 0, MintOpShortInt)
+
+OPDEF(MINT_LDC_R4, "ldc.r4", 4, 1, 0, MintOpFloat)
+OPDEF(MINT_LDC_R8, "ldc.r8", 6, 1, 0, MintOpDouble)
+
+OPDEF(MINT_INIT_ARGLIST, "init_arglist", 3, 1, 0, MintOpNoArgs)
+
+OPDEF(MINT_LDFLD_VT_I1, "ldfld.vt.i1", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_U1, "ldfld.vt.u1", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_I2, "ldfld.vt.i2", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_U2, "ldfld.vt.u2", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_I4, "ldfld.vt.i4", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_I8, "ldfld.vt.i8", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_R4, "ldfld.vt.r4", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_R8, "ldfld.vt.r8", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_O, "ldfld.vt.o", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_VT, "ldfld.vt.vt", 5, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_I8_UNALIGNED, "ldfld.vt.i8.unaligned", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_VT_R8_UNALIGNED, "ldfld.vt.r8.unaligned", 4, 1, 1, MintOpShortInt)
+
+OPDEF(MINT_LDFLD_I1, "ldfld.i1", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_U1, "ldfld.u1", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_I2, "ldfld.i2", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_U2, "ldfld.u2", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_I4, "ldfld.i4", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_I8, "ldfld.i8", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_R4, "ldfld.r4", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_R8, "ldfld.r8", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_O, "ldfld.o", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_VT, "ldfld.vt", 5, 1, 1, MintOpShortInt)
+OPDEF(MINT_LDFLD_I8_UNALIGNED, "ldfld.i8.unaligned", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLD_R8_UNALIGNED, "ldfld.r8.unaligned", 4, 1, 1, MintOpUShortInt)
+
+OPDEF(MINT_LDRMFLD, "ldrmfld", 4, 1, 1, MintOpFieldToken)
+OPDEF(MINT_LDRMFLD_VT, "ldrmfld.vt", 4, 1, 1, MintOpUShortInt)
+
+OPDEF(MINT_LDFLDA, "ldflda", 4, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDFLDA_UNSAFE, "ldflda.unsafe", 4, 1, 1, MintOpUShortInt)
+
+OPDEF(MINT_STFLD_I1, "stfld.i1", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_U1, "stfld.u1", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_I2, "stfld.i2", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_U2, "stfld.u2", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_I4, "stfld.i4", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_I8, "stfld.i8", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_R4, "stfld.r4", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_R8, "stfld.r8", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_O, "stfld.o", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_VT, "stfld.vt", 5, 0, 2, MintOpTwoShorts)
+OPDEF(MINT_STFLD_VT_NOREF, "stfld.vt.noref", 5, 0, 2, MintOpTwoShorts)
+OPDEF(MINT_STFLD_I8_UNALIGNED, "stfld.i8.unaligned", 4, 0, 2, MintOpUShortInt)
+OPDEF(MINT_STFLD_R8_UNALIGNED, "stfld.r8.unaligned", 4, 0, 2, MintOpUShortInt)
+
+OPDEF(MINT_STRMFLD, "strmfld", 4, 0, 2, MintOpFieldToken)
+OPDEF(MINT_STRMFLD_VT, "strmfld.vt", 4, 0, 2, MintOpUShortInt)
+
+OPDEF(MINT_LDTSFLD_I1, "ldtsfld.i1", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_U1, "ldtsfld.u1", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_I2, "ldtsfld.i2", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_U2, "ldtsfld.u2", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_I4, "ldtsfld.i4", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_I8, "ldtsfld.i8", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_R4, "ldtsfld.r4", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_R8, "ldtsfld.r8", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDTSFLD_O, "ldtsfld.o", 4, 1, 0, MintOpInt)
+OPDEF(MINT_LDSSFLD, "ldssfld", 5, 1, 0, MintOpFieldToken)
+OPDEF(MINT_LDSSFLD_VT, "ldssfld.vt", 5, 1, 0, MintOpInt)
+
+OPDEF(MINT_LDSFLD_I1, "ldsfld.i1", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_U1, "ldsfld.u1", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_I2, "ldsfld.i2", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_U2, "ldsfld.u2", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_I4, "ldsfld.i4", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_I8, "ldsfld.i8", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_R4, "ldsfld.r4", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_R8, "ldsfld.r8", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_O, "ldsfld.o", 4, 1, 0, MintOpUShortInt)
+OPDEF(MINT_LDSFLD_VT, "ldsfld.vt", 5, 1, 0, MintOpTwoShorts)
+
+OPDEF(MINT_STTSFLD_I1, "sttsfld.i1", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_U1, "sttsfld.u1", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_I2, "sttsfld.i2", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_U2, "sttsfld.u2", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_I4, "sttsfld.i4", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_I8, "sttsfld.i8", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_R4, "sttsfld.r4", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_R8, "sttsfld.r8", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STTSFLD_O, "sttsfld.o", 4, 0, 1, MintOpInt)
+OPDEF(MINT_STSSFLD, "stssfld", 5, 0, 1, MintOpFieldToken)
+OPDEF(MINT_STSSFLD_VT, "stssfld.vt", 5, 0, 1, MintOpInt)
+
+OPDEF(MINT_STSFLD_I1, "stsfld.i1", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_U1, "stsfld.u1", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_I2, "stsfld.i2", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_U2, "stsfld.u2", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_I4, "stsfld.i4", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_I8, "stsfld.i8", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_R4, "stsfld.r4", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_R8, "stsfld.r8", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_O, "stsfld.o", 4, 0, 1, MintOpUShortInt)
+OPDEF(MINT_STSFLD_VT, "stsfld.vt", 5, 0, 1, MintOpTwoShorts)
+OPDEF(MINT_LDSFLDA, "ldsflda", 4, 1, 0, MintOpTwoShorts)
+OPDEF(MINT_LDSSFLDA, "ldssflda", 4, 1, 0, MintOpInt)
+
+OPDEF(MINT_MOV_I1, "mov.i1", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_MOV_U1, "mov.u1", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_MOV_I2, "mov.i2", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_MOV_U2, "mov.u2", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_MOV_4, "mov.4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_MOV_8, "mov.8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_MOV_VT, "mov.vt", 4, 1, 1, MintOpShortInt)
+
+OPDEF(MINT_LDLOCA_S, "ldloca.s", 3, 1, 0, MintOpUShortInt)
+
+OPDEF(MINT_LDIND_I1_CHECK, "ldind.i1.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_U1_CHECK, "ldind.u1.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_I2_CHECK, "ldind.i2.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_U2_CHECK, "ldind.u2.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_I4_CHECK, "ldind.i4.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_U4_CHECK, "ldind.u4.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_I8_CHECK, "ldind.i8.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_I, "ldind.i", 3, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDIND_I8, "ldind.i8", 3, 1, 1, MintOpUShortInt)
+OPDEF(MINT_LDIND_R4_CHECK, "ldind.r4.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_R8_CHECK, "ldind.r8.check", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_REF, "ldind.ref", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDIND_REF_CHECK, "ldind.ref.check", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_STIND_I1, "stind.i1", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_STIND_I2, "stind.i2", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_STIND_I4, "stind.i4", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_STIND_I8, "stind.i8", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_STIND_I, "stind.i", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_STIND_R4, "stind.r4", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_STIND_R8, "stind.r8", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_STIND_REF, "stind.ref", 3, 0, 2, MintOpNoArgs)
+
+OPDEF(MINT_BR, "br", 3, 0, 0, MintOpBranch)
+OPDEF(MINT_LEAVE, "leave", 3, 0, 0, MintOpBranch)
+OPDEF(MINT_LEAVE_CHECK, "leave.check", 3, 0, 0, MintOpBranch)
+OPDEF(MINT_BR_S, "br.s", 2, 0, 0, MintOpShortBranch)
+OPDEF(MINT_LEAVE_S, "leave.s", 2, 0, 0, MintOpShortBranch)
+OPDEF(MINT_LEAVE_S_CHECK, "leave.s.check", 2, 0, 0, MintOpShortBranch)
+OPDEF(MINT_CALL_HANDLER, "call_handler", 4, 0, 0, MintOpBranch)
+OPDEF(MINT_CALL_HANDLER_S, "call_handler.s", 3, 0, 0, MintOpShortBranch)
+
+OPDEF(MINT_THROW, "throw", 2, 0, 1, MintOpNoArgs)
+OPDEF(MINT_RETHROW, "rethrow", 2, 0, 0, MintOpUShortInt)
+OPDEF(MINT_ENDFINALLY, "endfinally", 2, 0, 0, MintOpShortInt)
+OPDEF(MINT_MONO_RETHROW, "mono_rethrow", 2, 0, 1, MintOpNoArgs)
+
+OPDEF(MINT_CHECKPOINT, "checkpoint", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_SAFEPOINT, "safepoint", 1, 0, 0, MintOpNoArgs)
+
+OPDEF(MINT_BRFALSE_I4, "brfalse.i4", 4, 0, 1, MintOpBranch)
+OPDEF(MINT_BRFALSE_I8, "brfalse.i8", 4, 0, 1, MintOpBranch)
+OPDEF(MINT_BRFALSE_R4, "brfalse.r4", 4, 0, 1, MintOpBranch)
+OPDEF(MINT_BRFALSE_R8, "brfalse.r8", 4, 0, 1, MintOpBranch)
+OPDEF(MINT_BRTRUE_I4, "brtrue.i4", 4, 0, 1, MintOpBranch)
+OPDEF(MINT_BRTRUE_I8, "brtrue.i8", 4, 0, 1, MintOpBranch)
+OPDEF(MINT_BRTRUE_R4, "brtrue.r4", 4, 0, 1, MintOpBranch)
+OPDEF(MINT_BRTRUE_R8, "brtrue.r8", 4, 0, 1, MintOpBranch)
+
+OPDEF(MINT_BRFALSE_I4_S, "brfalse.i4.s", 3, 0, 1, MintOpShortBranch)
+OPDEF(MINT_BRFALSE_I8_S, "brfalse.i8.s", 3, 0, 1, MintOpShortBranch)
+OPDEF(MINT_BRFALSE_R4_S, "brfalse.r4.s", 3, 0, 1, MintOpShortBranch)
+OPDEF(MINT_BRFALSE_R8_S, "brfalse.r8.s", 3, 0, 1, MintOpShortBranch)
+OPDEF(MINT_BRTRUE_I4_S, "brtrue.i4.s", 3, 0, 1, MintOpShortBranch)
+OPDEF(MINT_BRTRUE_I8_S, "brtrue.i8.s", 3, 0, 1, MintOpShortBranch)
+OPDEF(MINT_BRTRUE_R4_S, "brtrue.r4.s", 3, 0, 1, MintOpShortBranch)
+OPDEF(MINT_BRTRUE_R8_S, "brtrue.r8.s", 3, 0, 1, MintOpShortBranch)
+
+OPDEF(MINT_BEQ_I4, "beq.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BEQ_I8, "beq.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BEQ_R4, "beq.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BEQ_R8, "beq.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_I4, "bge.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_I8, "bge.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_R4, "bge.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_R8, "bge.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_I4, "bgt.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_I8, "bgt.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_R4, "bgt.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_R8, "bgt.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_I4, "blt.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_I8, "blt.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_R4, "blt.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_R8, "blt.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_I4, "ble.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_I8, "ble.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_R4, "ble.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_R8, "ble.r8", 5, 0, 2, MintOpBranch)
+
+OPDEF(MINT_BNE_UN_I4, "bne.un.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BNE_UN_I8, "bne.un.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BNE_UN_R4, "bne.un.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BNE_UN_R8, "bne.un.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_UN_I4, "bge.un.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_UN_I8, "bge.un.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_UN_R4, "bge.un.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGE_UN_R8, "bge.un.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_UN_I4, "bgt.un.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_UN_I8, "bgt.un.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_UN_R4, "bgt.un.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BGT_UN_R8, "bgt.un.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_UN_I4, "ble.un.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_UN_I8, "ble.un.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_UN_R4, "ble.un.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLE_UN_R8, "ble.un.r8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_UN_I4, "blt.un.i4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_UN_I8, "blt.un.i8", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_UN_R4, "blt.un.r4", 5, 0, 2, MintOpBranch)
+OPDEF(MINT_BLT_UN_R8, "blt.un.r8", 5, 0, 2, MintOpBranch)
+
+OPDEF(MINT_BEQ_I4_S, "beq.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BEQ_I8_S, "beq.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BEQ_R4_S, "beq.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BEQ_R8_S, "beq.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_I4_S, "bge.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_I8_S, "bge.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_R4_S, "bge.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_R8_S, "bge.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_I4_S, "bgt.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_I8_S, "bgt.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_R4_S, "bgt.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_R8_S, "bgt.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_I4_S, "blt.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_I8_S, "blt.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_R4_S, "blt.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_R8_S, "blt.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_I4_S, "ble.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_I8_S, "ble.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_R4_S, "ble.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_R8_S, "ble.r8.s", 4, 0, 2, MintOpShortBranch)
+
+OPDEF(MINT_BNE_UN_I4_S, "bne.un.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BNE_UN_I8_S, "bne.un.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BNE_UN_R4_S, "bne.un.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BNE_UN_R8_S, "bne.un.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_UN_I4_S, "bge.un.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_UN_I8_S, "bge.un.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_UN_R4_S, "bge.un.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGE_UN_R8_S, "bge.un.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_UN_I4_S, "bgt.un.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_UN_I8_S, "bgt.un.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_UN_R4_S, "bgt.un.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BGT_UN_R8_S, "bgt.un.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_UN_I4_S, "ble.un.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_UN_I8_S, "ble.un.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_UN_R4_S, "ble.un.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLE_UN_R8_S, "ble.un.r8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_UN_I4_S, "blt.un.i4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_UN_I8_S, "blt.un.i8.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_UN_R4_S, "blt.un.r4.s", 4, 0, 2, MintOpShortBranch)
+OPDEF(MINT_BLT_UN_R8_S, "blt.un.r8.s", 4, 0, 2, MintOpShortBranch)
+
+OPDEF(MINT_SWITCH, "switch", 0, 0, 1, MintOpSwitch)
+
+OPDEF(MINT_LDSTR, "ldstr", 3, 1, 0, MintOpShortInt)
+OPDEF(MINT_LDSTR_TOKEN, "ldstr.token", 3, 1, 0, MintOpShortInt)
+
+OPDEF(MINT_JMP, "jmp", 2, 0, 0, MintOpMethodToken)
+
+OPDEF(MINT_ENDFILTER, "endfilter", 2, 0, 1, MintOpNoArgs)
+
+OPDEF(MINT_NEWOBJ, "newobj", 4, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_NEWOBJ_ARRAY, "newobj_array", 4, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_NEWOBJ_STRING, "newobj_string", 4, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_NEWOBJ_FAST, "newobj_fast", 6, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_NEWOBJ_VT_FAST, "newobj_vt_fast", 6, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_INITOBJ, "initobj", 3, 0, 1, MintOpShortInt)
+OPDEF(MINT_CASTCLASS, "castclass", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_ISINST, "isinst", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_CASTCLASS_INTERFACE, "castclass.interface", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_ISINST_INTERFACE, "isinst.interface", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_CASTCLASS_COMMON, "castclass.common", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_ISINST_COMMON, "isinst.common", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_NEWARR, "newarr", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_BOX, "box", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_BOX_VT, "box.vt", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_BOX_PTR, "box.ptr", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_BOX_NULLABLE_PTR, "box.nullable.ptr", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_UNBOX, "unbox", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_LDTOKEN, "ldtoken", 3, 1, 0, MintOpShortInt)
+OPDEF(MINT_LDFTN, "ldftn", 3, 1, 0, MintOpMethodToken)
+OPDEF(MINT_LDFTN_DYNAMIC, "ldftn.dynamic", 3, 1, 1, MintOpMethodToken)
+OPDEF(MINT_LDVIRTFTN, "ldvirtftn", 4, 1, 1, MintOpMethodToken)
+OPDEF(MINT_CPOBJ, "cpobj", 4, 0, 2, MintOpClassToken)
+OPDEF(MINT_CPOBJ_VT, "cpobj.vt", 4, 0, 2, MintOpClassToken)
+OPDEF(MINT_LDOBJ_VT, "ldobj.vt", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_STOBJ_VT, "stobj.vt", 4, 0, 2, MintOpClassToken)
+OPDEF(MINT_CPBLK, "cpblk", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_INITBLK, "initblk", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_LOCALLOC, "localloc", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_INITLOCALS, "initlocals", 3, 0, 0, MintOpTwoShorts)
+
+OPDEF(MINT_LDELEM_I, "ldelem.i", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_I1, "ldelem.i1", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_U1, "ldelem.u1", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_I2, "ldelem.i2", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_U2, "ldelem.u2", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_I4, "ldelem.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_U4, "ldelem.u4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_I8, "ldelem.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_R4, "ldelem.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_R8, "ldelem.r8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_REF, "ldelem.ref", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_LDELEM_VT, "ldelem.vt", 5, 1, 2, MintOpShortInt)
+
+OPDEF(MINT_LDELEMA1, "ldelema1", 5, 1, 2, MintOpShortInt)
+OPDEF(MINT_LDELEMA, "ldelema", 4, CallArgs, 0, MintOpTwoShorts)
+OPDEF(MINT_LDELEMA_TC, "ldelema.tc", 3, CallArgs, 0, MintOpTwoShorts)
+
+OPDEF(MINT_STELEM_I, "stelem.i", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_I1, "stelem.i1", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_U1, "stelem.u1", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_I2, "stelem.i2", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_U2, "stelem.u2", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_I4, "stelem.i4", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_I8, "stelem.i8", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_R4, "stelem.r4", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_R8, "stelem.r8", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_REF, "stelem.ref", 4, 0, 3, MintOpNoArgs)
+OPDEF(MINT_STELEM_VT, "stelem.vt", 6, 0, 3, MintOpTwoShorts)
+
+OPDEF(MINT_LDLEN, "ldlen", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LDLEN_SPAN, "ldlen.span", 4, 1, 1, MintOpShortInt)
+
+OPDEF(MINT_GETITEM_SPAN, "getitem.span", 7, 1, 2, MintOpTwoShorts)
 
 /* binops */
-OPDEF(MINT_ADD_I4, "add.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_ADD_I8, "add.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_ADD_R4, "add.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_ADD_R8, "add.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_SUB_I4, "sub.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SUB_I8, "sub.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SUB_R4, "sub.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SUB_R8, "sub.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_MUL_I4, "mul.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_MUL_I8, "mul.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_MUL_R4, "mul.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_MUL_R8, "mul.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_DIV_I4, "div.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_DIV_I8, "div.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_DIV_R4, "div.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_DIV_R8, "div.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_DIV_UN_I4, "div.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_DIV_UN_I8, "div.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_ADD_OVF_I4, "add.ovf.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_ADD_OVF_I8, "add.ovf.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_ADD_OVF_UN_I4, "add.ovf.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_ADD_OVF_UN_I8, "add.ovf.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_MUL_OVF_I4, "mul.ovf.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_MUL_OVF_I8, "mul.ovf.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_MUL_OVF_UN_I4, "mul.ovf.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_MUL_OVF_UN_I8, "mul.ovf.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_SUB_OVF_I4, "sub.ovf.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SUB_OVF_I8, "sub.ovf.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_SUB_OVF_UN_I4, "sub.ovf.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SUB_OVF_UN_I8, "sub.ovf.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_AND_I4, "and.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_AND_I8, "and.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_OR_I4, "or.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_OR_I8, "or.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_XOR_I4, "xor.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_XOR_I8, "xor.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_REM_I4, "rem.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_REM_I8, "rem.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_REM_R4, "rem.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_REM_R8, "rem.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_REM_UN_I4, "rem.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_REM_UN_I8, "rem.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_SHR_UN_I4, "shr.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SHR_UN_I8, "shr.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SHL_I4, "shl.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SHL_I8, "shl.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SHR_I4, "shr.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SHR_I8, "shr.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CEQ_I4, "ceq.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CEQ_I8, "ceq.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CEQ_R4, "ceq.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CEQ_R8, "ceq.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CNE_I4, "cne.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CNE_I8, "cne.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CNE_R4, "cne.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CNE_R8, "cne.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CGT_I4, "cgt.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGT_I8, "cgt.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGT_R4, "cgt.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGT_R8, "cgt.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CGE_I4, "cge.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGE_I8, "cge.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGE_R4, "cge.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGE_R8, "cge.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CGE_UN_I4, "cge.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGE_UN_I8, "cge.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CGT_UN_I4, "cgt.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGT_UN_I8, "cgt.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGT_UN_R4, "cgt.un.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CGT_UN_R8, "cgt.un.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CLT_I4, "clt.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLT_I8, "clt.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLT_R4, "clt.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLT_R8, "clt.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CLE_I4, "cle.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLE_I8, "cle.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLE_R4, "cle.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLE_R8, "cle.r8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CLE_UN_I4, "cle.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLE_UN_I8, "cle.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CLT_UN_I4, "clt.un.i4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLT_UN_I8, "clt.un.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLT_UN_R4, "clt.un.r4", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CLT_UN_R8, "clt.un.r8", 1, Pop2, Push1, MintOpNoArgs)
+OPDEF(MINT_ADD_I4, "add.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_ADD_I8, "add.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_ADD_R4, "add.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_ADD_R8, "add.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_SUB_I4, "sub.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SUB_I8, "sub.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SUB_R4, "sub.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SUB_R8, "sub.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_MUL_I4, "mul.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_MUL_I8, "mul.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_MUL_R4, "mul.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_MUL_R8, "mul.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_DIV_I4, "div.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_DIV_I8, "div.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_DIV_R4, "div.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_DIV_R8, "div.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_DIV_UN_I4, "div.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_DIV_UN_I8, "div.un.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_ADD_OVF_I4, "add.ovf.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_ADD_OVF_I8, "add.ovf.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_ADD_OVF_UN_I4, "add.ovf.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_ADD_OVF_UN_I8, "add.ovf.un.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_MUL_OVF_I4, "mul.ovf.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_MUL_OVF_I8, "mul.ovf.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_MUL_OVF_UN_I4, "mul.ovf.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_MUL_OVF_UN_I8, "mul.ovf.un.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_SUB_OVF_I4, "sub.ovf.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SUB_OVF_I8, "sub.ovf.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_SUB_OVF_UN_I4, "sub.ovf.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SUB_OVF_UN_I8, "sub.ovf.un.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_AND_I4, "and.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_AND_I8, "and.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_OR_I4, "or.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_OR_I8, "or.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_XOR_I4, "xor.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_XOR_I8, "xor.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_REM_I4, "rem.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_REM_I8, "rem.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_REM_R4, "rem.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_REM_R8, "rem.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_REM_UN_I4, "rem.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_REM_UN_I8, "rem.un.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_SHR_UN_I4, "shr.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SHR_UN_I8, "shr.un.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SHL_I4, "shl.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SHL_I8, "shl.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SHR_I4, "shr.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SHR_I8, "shr.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CEQ_I4, "ceq.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CEQ_I8, "ceq.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CEQ_R4, "ceq.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CEQ_R8, "ceq.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CNE_I4, "cne.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CNE_I8, "cne.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CNE_R4, "cne.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CNE_R8, "cne.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CGT_I4, "cgt.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGT_I8, "cgt.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGT_R4, "cgt.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGT_R8, "cgt.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CGE_I4, "cge.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGE_I8, "cge.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGE_R4, "cge.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGE_R8, "cge.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CGE_UN_I4, "cge.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGE_UN_I8, "cge.un.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CGT_UN_I4, "cgt.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGT_UN_I8, "cgt.un.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGT_UN_R4, "cgt.un.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CGT_UN_R8, "cgt.un.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CLT_I4, "clt.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLT_I8, "clt.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLT_R4, "clt.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLT_R8, "clt.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CLE_I4, "cle.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLE_I8, "cle.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLE_R4, "cle.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLE_R8, "cle.r8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CLE_UN_I4, "cle.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLE_UN_I8, "cle.un.i8", 4, 1, 2, MintOpNoArgs)
+
+OPDEF(MINT_CLT_UN_I4, "clt.un.i4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLT_UN_I8, "clt.un.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLT_UN_R4, "clt.un.r4", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CLT_UN_R8, "clt.un.r8", 4, 1, 2, MintOpNoArgs)
 /* binops end */
 
-OPDEF(MINT_LOCADD1_I4, "locadd1.i4", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_LOCADD1_I8, "locadd1.i8", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_LOCSUB1_I4, "locsub1.i4", 2, Pop0, Push0, MintOpUShortInt)
-OPDEF(MINT_LOCSUB1_I8, "locsub1.i8", 2, Pop0, Push0, MintOpUShortInt)
-
 /* unops */
-OPDEF(MINT_ADD1_I4, "add1.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ADD1_I8, "add1.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_SUB1_I4, "sub1.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_SUB1_I8, "sub1.i8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_NEG_I4, "neg.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_NEG_I8, "neg.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_NEG_R4, "neg.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_NEG_R8, "neg.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_NOT_I4, "not.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_NOT_I8, "not.i8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_R_UN_I4, "conv.r.un.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_R_UN_I8, "conv.r.un.i8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_I1_I4, "conv.i1.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I1_I8, "conv.i1.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I1_R4, "conv.i1.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I1_R8, "conv.i1.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_U1_I4, "conv.u1.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U1_I8, "conv.u1.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U1_R4, "conv.u1.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U1_R8, "conv.u1.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_I2_I4, "conv.i2.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I2_I8, "conv.i2.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I2_R4, "conv.i2.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I2_R8, "conv.i2.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_U2_I4, "conv.u2.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U2_I8, "conv.u2.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U2_R4, "conv.u2.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U2_R8, "conv.u2.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_I4_I8, "conv.i4.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I4_R4, "conv.i4.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I4_R8, "conv.i4.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_U4_I8, "conv.u4.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U4_R4, "conv.u4.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U4_R8, "conv.u4.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_I8_I4, "conv.i8.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I8_U4, "conv.i8.u4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I8_R4, "conv.i8.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_I8_R8, "conv.i8.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_R4_I4, "conv.r4.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_R4_I8, "conv.r4.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_R4_R8, "conv.r4.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_R8_I4, "conv.r8.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_R8_I8, "conv.r8.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_R8_R4, "conv.r8.r4", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_U8_R4, "conv.u8.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_U8_R8, "conv.u8.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I1_I4, "conv.ovf.i1.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I1_I8, "conv.ovf.i1.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I1_R4, "conv.ovf.i1.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I1_R8, "conv.ovf.i1.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I1_U4, "conv.ovf.i1.u4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I1_U8, "conv.ovf.i1.u8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I1_UN_R4, "conv.ovf.i1.un.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I1_UN_R8, "conv.ovf.i1.un.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_U1_I4, "conv.ovf.u1.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U1_I8, "conv.ovf.u1.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U1_R4, "conv.ovf.u1.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U1_R8, "conv.ovf.u1.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I2_I4, "conv.ovf.i2.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I2_I8, "conv.ovf.i2.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I2_R4, "conv.ovf.i2.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I2_R8, "conv.ovf.i2.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I2_U4, "conv.ovf.i2.u4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I2_U8, "conv.ovf.i2.u8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I2_UN_R4, "conv.ovf.i2.un.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I2_UN_R8, "conv.ovf.i2.un.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_U2_I4, "conv.ovf.u2.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U2_I8, "conv.ovf.u2.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U2_R4, "conv.ovf.u2.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U2_R8, "conv.ovf.u2.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I4_U4, "conv.ovf.i4.u4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I4_I8, "conv.ovf.i4.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I4_U8, "conv.ovf.i4.u8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I4_R4, "conv.ovf.i4.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I4_R8, "conv.ovf.i4.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I4_UN_R8, "conv.ovf.i4.un.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_U4_I4, "conv.ovf.u4.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U4_I8, "conv.ovf.u4.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U4_R4, "conv.ovf.u4.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U4_R8, "conv.ovf.u4.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I8_U8, "conv.ovf.i8.u8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I8_R4, "conv.ovf.i8.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I8_R8, "conv.ovf.i8.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_I8_UN_R8, "conv.ovf.i8.un.r8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_I8_UN_R4, "conv.ovf.i8.un.r4", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_OVF_U8_I4, "conv.ovf.u8.i4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U8_I8, "conv.ovf.u8.i8", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U8_R4, "conv.ovf.u8.r4", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CONV_OVF_U8_R8, "conv.ovf.u8.r8", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CEQ0_I4, "ceq0.i4", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_CONV_I4_I8_SP, "conv.i4.i8.sp", 1, Pop2, Push2, MintOpNoArgs) /* special for narrowing sp[-2] on 64 bits */
-OPDEF(MINT_CONV_I8_I4_SP, "conv.i8.i4.sp", 1, Pop2, Push2, MintOpNoArgs) /* special for widening sp[-2] on 64 bits */
-OPDEF(MINT_CONV_R8_R4_SP, "conv.r8.r4.sp", 1, Pop2, Push2, MintOpNoArgs)
+OPDEF(MINT_ADD1_I4, "add1.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ADD1_I8, "add1.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_SUB1_I4, "sub1.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_SUB1_I8, "sub1.i8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_NEG_I4, "neg.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_NEG_I8, "neg.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_NEG_R4, "neg.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_NEG_R8, "neg.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_NOT_I4, "not.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_NOT_I8, "not.i8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_R_UN_I4, "conv.r.un.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_R_UN_I8, "conv.r.un.i8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_I1_I4, "conv.i1.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I1_I8, "conv.i1.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I1_R4, "conv.i1.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I1_R8, "conv.i1.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_U1_I4, "conv.u1.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U1_I8, "conv.u1.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U1_R4, "conv.u1.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U1_R8, "conv.u1.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_I2_I4, "conv.i2.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I2_I8, "conv.i2.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I2_R4, "conv.i2.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I2_R8, "conv.i2.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_U2_I4, "conv.u2.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U2_I8, "conv.u2.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U2_R4, "conv.u2.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U2_R8, "conv.u2.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_I4_I8, "conv.i4.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I4_R4, "conv.i4.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I4_R8, "conv.i4.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_U4_I8, "conv.u4.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U4_R4, "conv.u4.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U4_R8, "conv.u4.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_I8_I4, "conv.i8.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I8_U4, "conv.i8.u4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I8_R4, "conv.i8.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_I8_R8, "conv.i8.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_R4_I4, "conv.r4.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_R4_I8, "conv.r4.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_R4_R8, "conv.r4.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_R8_I4, "conv.r8.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_R8_I8, "conv.r8.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_R8_R4, "conv.r8.r4", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_U8_R4, "conv.u8.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_U8_R8, "conv.u8.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I1_I4, "conv.ovf.i1.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I1_I8, "conv.ovf.i1.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I1_R4, "conv.ovf.i1.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I1_R8, "conv.ovf.i1.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I1_U4, "conv.ovf.i1.u4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I1_U8, "conv.ovf.i1.u8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I1_UN_R4, "conv.ovf.i1.un.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I1_UN_R8, "conv.ovf.i1.un.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_U1_I4, "conv.ovf.u1.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U1_I8, "conv.ovf.u1.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U1_R4, "conv.ovf.u1.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U1_R8, "conv.ovf.u1.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I2_I4, "conv.ovf.i2.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I2_I8, "conv.ovf.i2.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I2_R4, "conv.ovf.i2.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I2_R8, "conv.ovf.i2.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I2_U4, "conv.ovf.i2.u4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I2_U8, "conv.ovf.i2.u8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I2_UN_R4, "conv.ovf.i2.un.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I2_UN_R8, "conv.ovf.i2.un.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_U2_I4, "conv.ovf.u2.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U2_I8, "conv.ovf.u2.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U2_R4, "conv.ovf.u2.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U2_R8, "conv.ovf.u2.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I4_U4, "conv.ovf.i4.u4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I4_I8, "conv.ovf.i4.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I4_U8, "conv.ovf.i4.u8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I4_R4, "conv.ovf.i4.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I4_R8, "conv.ovf.i4.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I4_UN_R8, "conv.ovf.i4.un.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_U4_I4, "conv.ovf.u4.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U4_I8, "conv.ovf.u4.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U4_R4, "conv.ovf.u4.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U4_R8, "conv.ovf.u4.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I8_U8, "conv.ovf.i8.u8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I8_R4, "conv.ovf.i8.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I8_R8, "conv.ovf.i8.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_I8_UN_R8, "conv.ovf.i8.un.r8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_I8_UN_R4, "conv.ovf.i8.un.r4", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CONV_OVF_U8_I4, "conv.ovf.u8.i4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U8_I8, "conv.ovf.u8.i8", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U8_R4, "conv.ovf.u8.r4", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CONV_OVF_U8_R8, "conv.ovf.u8.r8", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_CEQ0_I4, "ceq0.i4", 3, 1, 1, MintOpNoArgs)
 /* unops end */
 
-OPDEF(MINT_CKFINITE, "ckfinite", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_MKREFANY, "mkrefany", 2, Pop1, Push1, MintOpClassToken)
-OPDEF(MINT_REFANYTYPE, "refanytype", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_REFANYVAL, "refanyval", 2, Pop1, Push1, MintOpNoArgs)
+OPDEF(MINT_CKFINITE, "ckfinite", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_MKREFANY, "mkrefany", 4, 1, 1, MintOpClassToken)
+OPDEF(MINT_REFANYTYPE, "refanytype", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_REFANYVAL, "refanyval", 4, 1, 1, MintOpNoArgs)
 
-OPDEF(MINT_CKNULL_N, "cknull_n", 2, Pop0, Push0, MintOpUShortInt)
+OPDEF(MINT_CKNULL, "cknull", 3, 1, 1, MintOpNoArgs)
 
-OPDEF(MINT_GETCHR, "getchr", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_STRLEN, "strlen", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ARRAY_RANK, "array_rank", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ARRAY_ELEMENT_SIZE, "array_element_size", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ARRAY_IS_PRIMITIVE, "array_is_primitive", 1, Pop1, Push1, MintOpNoArgs)
+OPDEF(MINT_GETCHR, "getchr", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_STRLEN, "strlen", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ARRAY_RANK, "array_rank", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ARRAY_ELEMENT_SIZE, "array_element_size", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ARRAY_IS_PRIMITIVE, "array_is_primitive", 3, 1, 1, MintOpNoArgs)
 
 /* Calls */
-OPDEF(MINT_CALL, "call", 3, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_CALLVIRT, "callvirt", 3, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_CALLVIRT_FAST, "callvirt.fast", 4, VarPop, Push1, MintOpMethodToken)
-OPDEF(MINT_CALL_DELEGATE, "call.delegate", 3, VarPop, VarPush, MintOpTwoShorts)
-OPDEF(MINT_CALLI, "calli", 3, VarPop, VarPush, MintOpMethodToken)
-OPDEF(MINT_CALLI_NAT, "calli.nat", 7, VarPop, VarPush, MintOpMethodToken)
-OPDEF(MINT_CALLI_NAT_DYNAMIC, "calli.nat.dynamic", 3, VarPop, VarPush, MintOpMethodToken)
-OPDEF(MINT_CALLI_NAT_FAST, "calli.nat.fast", 4, VarPop, VarPush, MintOpMethodToken)
-OPDEF(MINT_CALL_VARARG, "call.vararg", 4, VarPop, VarPush, MintOpMethodToken)
-OPDEF(MINT_CALLRUN, "callrun", 3, VarPop, VarPush, MintOpNoArgs)
-
-OPDEF(MINT_ICALL_V_V, "mono_icall_v_v", 2, Pop0, Push0, MintOpClassToken) /* not really */
-OPDEF(MINT_ICALL_V_P, "mono_icall_v_p", 2, Pop0, Push1, MintOpClassToken)
-OPDEF(MINT_ICALL_P_V, "mono_icall_p_v", 2, Pop1, Push0, MintOpClassToken)
-OPDEF(MINT_ICALL_P_P, "mono_icall_p_p", 2, Pop1, Push1, MintOpClassToken)
-OPDEF(MINT_ICALL_PP_V, "mono_icall_pp_v", 2, Pop2, Push0, MintOpClassToken)
-OPDEF(MINT_ICALL_PP_P, "mono_icall_pp_p", 2, Pop2, Push1, MintOpClassToken)
-OPDEF(MINT_ICALL_PPP_V, "mono_icall_ppp_v", 2, Pop3, Push0, MintOpClassToken)
-OPDEF(MINT_ICALL_PPP_P, "mono_icall_ppp_p", 2, Pop3, Push1, MintOpClassToken)
-OPDEF(MINT_ICALL_PPPP_V, "mono_icall_pppp_v", 2, Pop4, Push0, MintOpClassToken)
-OPDEF(MINT_ICALL_PPPP_P, "mono_icall_pppp_p", 2, Pop4, Push1, MintOpClassToken)
-OPDEF(MINT_ICALL_PPPPP_V, "mono_icall_ppppp_v", 2, Pop5, Push0, MintOpClassToken)
-OPDEF(MINT_ICALL_PPPPP_P, "mono_icall_ppppp_p", 2, Pop5, Push1, MintOpClassToken)
-OPDEF(MINT_ICALL_PPPPPP_V, "mono_icall_pppppp_v", 2, Pop6, Push0, MintOpClassToken)
-OPDEF(MINT_ICALL_PPPPPP_P, "mono_icall_pppppp_p", 2, Pop6, Push1, MintOpClassToken)
+OPDEF(MINT_CALL, "call", 3, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_CALLVIRT, "callvirt", 3, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_CALLVIRT_FAST, "callvirt.fast", 4, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_CALL_DELEGATE, "call.delegate", 4, CallArgs, 0, MintOpTwoShorts)
+OPDEF(MINT_CALLI, "calli", 4, CallArgs, 1, MintOpMethodToken)
+OPDEF(MINT_CALLI_NAT, "calli.nat", 7, CallArgs, 1, MintOpMethodToken)
+OPDEF(MINT_CALLI_NAT_DYNAMIC, "calli.nat.dynamic", 4, CallArgs, 1, MintOpMethodToken)
+OPDEF(MINT_CALLI_NAT_FAST, "calli.nat.fast", 5, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_CALL_VARARG, "call.vararg", 5, CallArgs, 0, MintOpMethodToken)
+OPDEF(MINT_CALLRUN, "callrun", 4, CallArgs, 0, MintOpNoArgs)
+
+OPDEF(MINT_ICALL_V_V, "mono_icall_v_v", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_V_P, "mono_icall_v_p", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_P_V, "mono_icall_p_v", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_P_P, "mono_icall_p_p", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PP_V, "mono_icall_pp_v", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PP_P, "mono_icall_pp_p", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPP_V, "mono_icall_ppp_v", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPP_P, "mono_icall_ppp_p", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPPP_V, "mono_icall_pppp_v", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPPP_P, "mono_icall_pppp_p", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPPPP_V, "mono_icall_ppppp_v", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPPPP_P, "mono_icall_ppppp_p", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPPPPP_V, "mono_icall_pppppp_v", 3, CallArgs, 0, MintOpShortInt)
+OPDEF(MINT_ICALL_PPPPPP_P, "mono_icall_pppppp_p", 3, CallArgs, 0, MintOpShortInt)
 // FIXME: MintOp
-OPDEF(MINT_JIT_CALL, "mono_jit_call", 3, VarPop, VarPush, MintOpNoArgs)
-OPDEF(MINT_JIT_CALL2, "mono_jit_call2", 5, VarPop, VarPush, MintOpNoArgs)
-
-OPDEF(MINT_MONO_LDPTR, "mono_ldptr", 2, Pop0, Push1, MintOpClassToken)
-OPDEF(MINT_MONO_SGEN_THREAD_INFO, "mono_sgen_thread_info", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_MONO_NEWOBJ, "mono_newobj", 2, Pop0, Push1, MintOpClassToken)
-OPDEF(MINT_MONO_RETOBJ, "mono_retobj", 1, Pop1, Push0, MintOpNoArgs)
-OPDEF(MINT_MONO_ATOMIC_STORE_I4, "mono_atomic.store.i4", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_MONO_MEMORY_BARRIER, "mono_memory_barrier", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_MONO_EXCHANGE_I8, "mono_interlocked.xchg.i8", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_MONO_LDDOMAIN, "mono_lddomain", 1, Pop0, Push1, MintOpNoArgs)
-OPDEF(MINT_MONO_GET_SP, "mono_get_sp", 1, Pop0, Push1, MintOpNoArgs)
-
-OPDEF(MINT_SDB_INTR_LOC, "sdb_intr_loc", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_SDB_SEQ_POINT, "sdb_seq_point", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_SDB_BREAKPOINT, "sdb_breakpoint", 1, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_LD_DELEGATE_METHOD_PTR, "ld_delegate_method_ptr", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_START_ABORT_PROT, "start_abort_protected", 1, Pop0, Push0, MintOpNoArgs)
+OPDEF(MINT_JIT_CALL, "mono_jit_call", 3, CallArgs, 0, MintOpNoArgs)
+OPDEF(MINT_JIT_CALL2, "mono_jit_call2", 6, CallArgs, 0, MintOpNoArgs)
+
+OPDEF(MINT_MONO_LDPTR, "mono_ldptr", 3, 1, 0, MintOpShortInt)
+OPDEF(MINT_MONO_SGEN_THREAD_INFO, "mono_sgen_thread_info", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_MONO_NEWOBJ, "mono_newobj", 3, 1, 0, MintOpClassToken)
+OPDEF(MINT_MONO_RETOBJ, "mono_retobj", 2, 0, 1, MintOpNoArgs)
+OPDEF(MINT_MONO_ATOMIC_STORE_I4, "mono_atomic.store.i4", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_MONO_MEMORY_BARRIER, "mono_memory_barrier", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_MONO_EXCHANGE_I8, "mono_interlocked.xchg.i8", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_MONO_LDDOMAIN, "mono_lddomain", 2, 1, 0, MintOpNoArgs)
+OPDEF(MINT_MONO_GET_SP, "mono_get_sp", 2, 1, 0, MintOpNoArgs)
+
+OPDEF(MINT_SDB_INTR_LOC, "sdb_intr_loc", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_SDB_SEQ_POINT, "sdb_seq_point", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_SDB_BREAKPOINT, "sdb_breakpoint", 1, 0, 0, MintOpNoArgs)
+OPDEF(MINT_LD_DELEGATE_METHOD_PTR, "ld_delegate_method_ptr", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_START_ABORT_PROT, "start_abort_protected", 1, 0, 0, MintOpNoArgs)
 
 // Math intrinsics
 // double
-OPDEF(MINT_ABS, "abs", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ASIN, "asin", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ASINH, "asinh", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ACOS, "acos", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ACOSH, "acosh", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ATAN, "atan", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ATANH, "atanh", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ATAN2, "atan2", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CEILING, "ceiling", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_COS, "cos", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CBRT, "cbrt", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_COSH, "cosh", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_EXP, "exp", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_FMA, "fma", 1, Pop3, Push1, MintOpNoArgs)
-OPDEF(MINT_FLOOR, "floor", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ILOGB, "ilogb", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LOG, "log", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LOG2, "log2", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LOG10, "log10", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_POW, "pow", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SCALEB, "scaleb", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SIN, "sin", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_SQRT, "sqrt", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_SINH, "sinh", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_TAN, "tan", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_TANH, "tanh", 1, Pop1, Push1, MintOpNoArgs)
+OPDEF(MINT_ABS, "abs", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ASIN, "asin", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ASINH, "asinh", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ACOS, "acos", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ACOSH, "acosh", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ATAN, "atan", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ATANH, "atanh", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ATAN2, "atan2", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CEILING, "ceiling", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_COS, "cos", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CBRT, "cbrt", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_COSH, "cosh", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_EXP, "exp", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_FMA, "fma", 5, 1, 3, MintOpNoArgs)
+OPDEF(MINT_FLOOR, "floor", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ILOGB, "ilogb", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LOG, "log", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LOG2, "log2", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LOG10, "log10", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_POW, "pow", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SCALEB, "scaleb", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SIN, "sin", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_SQRT, "sqrt", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_SINH, "sinh", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_TAN, "tan", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_TANH, "tanh", 3, 1, 1, MintOpNoArgs)
 
 // float. These must be kept in the same order as their double counterpart
-OPDEF(MINT_ABSF, "absf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ASINF, "asinf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ASINHF, "asinhf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ACOSF, "acosf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ACOSHF, "acoshf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ATANF, "atanf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ATANHF, "atanhf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ATAN2F, "atan2f", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_CEILINGF, "ceilingf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_COSF, "cosf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_CBRTF, "cbrtf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_COSHF, "coshf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_EXPF, "expf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_FMAF, "fmaf", 1, Pop3, Push1, MintOpNoArgs)
-OPDEF(MINT_FLOORF, "floorf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_ILOGBF, "ilogbf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LOGF, "logf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LOG2F, "log2f", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_LOG10F, "log10f", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_POWF, "powf", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SCALEBF, "scalebf", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_SINF, "sinf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_SQRTF, "sqrtf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_SINHF, "sinhf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_TANF, "tanf", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_TANHF, "tanhf", 1, Pop1, Push1, MintOpNoArgs)
-
-OPDEF(MINT_PROF_ENTER, "prof_enter", 2, Pop0, Push0, MintOpNoArgs)
-OPDEF(MINT_PROF_EXIT, "prof_exit", 4, Pop1, Push0, MintOpShortAndInt)
-OPDEF(MINT_PROF_EXIT_VOID, "prof_exit_void", 4, Pop0, Push0, MintOpShortAndInt)
-OPDEF(MINT_PROF_COVERAGE_STORE, "prof_coverage_store", 5, Pop0, Push0, MintOpLongInt)
-
-OPDEF(MINT_INTRINS_ENUM_HASFLAG, "intrins_enum_hasflag", 2, Pop2, Push1, MintOpClassToken)
-OPDEF(MINT_INTRINS_GET_HASHCODE, "intrins_get_hashcode", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_GET_TYPE, "intrins_get_type", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_SPAN_CTOR, "intrins_span_ctor", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_BYREFERENCE_GET_VALUE, "intrins_byreference_get_value", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_UNSAFE_ADD_BYTE_OFFSET, "intrins_unsafe_add_byte_offset", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_UNSAFE_BYTE_OFFSET, "intrins_unsafe_byte_offset", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_RUNTIMEHELPERS_OBJECT_HAS_COMPONENT_SIZE, "intrins_runtimehelpers_object_has_component_size", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_CLEAR_WITH_REFERENCES, "intrin_clear_with_references", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_INTRINS_MARVIN_BLOCK, "intrins_marvin_block", 1, Pop2, Push0, MintOpNoArgs)
-OPDEF(MINT_INTRINS_ASCII_CHARS_TO_UPPERCASE, "intrins_ascii_chars_to_uppercase", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF, "intrins_memorymarshal_getarraydataref", 1, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_ORDINAL_IGNORE_CASE_ASCII, "intrins_ordinal_ignore_case_ascii", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_64ORDINAL_IGNORE_CASE_ASCII, "intrins_64ordinal_ignore_case_ascii", 1, Pop2, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_U32_TO_DECSTR, "intrins_u32_to_decstr", 3, Pop1, Push1, MintOpNoArgs)
-OPDEF(MINT_INTRINS_WIDEN_ASCII_TO_UTF16, "intrins_widen_ascii_to_utf16", 1, Pop3, Push1, MintOpNoArgs)
+OPDEF(MINT_ABSF, "absf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ASINF, "asinf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ASINHF, "asinhf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ACOSF, "acosf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ACOSHF, "acoshf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ATANF, "atanf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ATANHF, "atanhf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ATAN2F, "atan2f", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_CEILINGF, "ceilingf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_COSF, "cosf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_CBRTF, "cbrtf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_COSHF, "coshf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_EXPF, "expf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_FMAF, "fmaf", 5, 1, 3, MintOpNoArgs)
+OPDEF(MINT_FLOORF, "floorf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_ILOGBF, "ilogbf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LOGF, "logf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LOG2F, "log2f", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_LOG10F, "log10f", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_POWF, "powf", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SCALEBF, "scalebf", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_SINF, "sinf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_SQRTF, "sqrtf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_SINHF, "sinhf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_TANF, "tanf", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_TANHF, "tanhf", 3, 1, 1, MintOpNoArgs)
+
+OPDEF(MINT_PROF_ENTER, "prof_enter", 2, 0, 0, MintOpShortInt)
+OPDEF(MINT_PROF_EXIT, "prof_exit", 5, 0, 1, MintOpShortAndInt)
+OPDEF(MINT_PROF_COVERAGE_STORE, "prof_coverage_store", 5, 0, 0, MintOpLongInt)
+
+OPDEF(MINT_INTRINS_ENUM_HASFLAG, "intrins_enum_hasflag", 5, 1, 2, MintOpClassToken)
+OPDEF(MINT_INTRINS_GET_HASHCODE, "intrins_get_hashcode", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_INTRINS_GET_TYPE, "intrins_get_type", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_INTRINS_SPAN_CTOR, "intrins_span_ctor", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_INTRINS_BYREFERENCE_GET_VALUE, "intrins_byreference_get_value", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_INTRINS_UNSAFE_ADD_BYTE_OFFSET, "intrins_unsafe_add_byte_offset", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_INTRINS_UNSAFE_BYTE_OFFSET, "intrins_unsafe_byte_offset", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_INTRINS_RUNTIMEHELPERS_OBJECT_HAS_COMPONENT_SIZE, "intrins_runtimehelpers_object_has_component_size", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_INTRINS_CLEAR_WITH_REFERENCES, "intrin_clear_with_references", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_INTRINS_MARVIN_BLOCK, "intrins_marvin_block", 3, 0, 2, MintOpNoArgs)
+OPDEF(MINT_INTRINS_ASCII_CHARS_TO_UPPERCASE, "intrins_ascii_chars_to_uppercase", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF, "intrins_memorymarshal_getarraydataref", 3, 1, 1, MintOpNoArgs)
+OPDEF(MINT_INTRINS_ORDINAL_IGNORE_CASE_ASCII, "intrins_ordinal_ignore_case_ascii", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_INTRINS_64ORDINAL_IGNORE_CASE_ASCII, "intrins_64ordinal_ignore_case_ascii", 4, 1, 2, MintOpNoArgs)
+OPDEF(MINT_INTRINS_U32_TO_DECSTR, "intrins_u32_to_decstr", 5, 1, 1, MintOpTwoShorts)
+OPDEF(MINT_INTRINS_WIDEN_ASCII_TO_UTF16, "intrins_widen_ascii_to_utf16", 5, 1, 3, MintOpNoArgs)
index fe264b5..262062e 100644 (file)
@@ -52,12 +52,9 @@ typedef enum {
 #define READ64(x) (*(guint64 *)(x))
 #endif
 
-#define MINT_SWITCH_LEN(n) (3 + (n) * 2)
+#define MINT_SWITCH_LEN(n) (4 + (n) * 2)
 
-#define MINT_IS_LDLOC(op) ((op) >= MINT_LDLOC_I1 && (op) <= MINT_LDLOC_VT)
-#define MINT_IS_STLOC(op) ((op) >= MINT_STLOC_I1 && (op) <= MINT_STLOC_VT)
-#define MINT_IS_MOVLOC(op) ((op) >= MINT_MOVLOC_1 && (op) <= MINT_MOVLOC_VT)
-#define MINT_IS_STLOC_NP(op) ((op) >= MINT_STLOC_NP_I4 && (op) <= MINT_STLOC_NP_O)
+#define MINT_IS_MOV(op) ((op) >= MINT_MOV_I1 && (op) <= MINT_MOV_VT)
 #define MINT_IS_CONDITIONAL_BRANCH(op) ((op) >= MINT_BRFALSE_I4 && (op) <= MINT_BLT_UN_R8_S)
 #define MINT_IS_UNOP_CONDITIONAL_BRANCH(op) ((op) >= MINT_BRFALSE_I4 && (op) <= MINT_BRTRUE_R8_S)
 #define MINT_IS_BINOP_CONDITIONAL_BRANCH(op) ((op) >= MINT_BEQ_I4 && (op) <= MINT_BLT_UN_R8_S)
@@ -65,23 +62,17 @@ typedef enum {
 #define MINT_IS_PATCHABLE_CALL(op) ((op) >= MINT_CALL && (op) <= MINT_VCALL)
 #define MINT_IS_NEWOBJ(op) ((op) >= MINT_NEWOBJ && (op) <= MINT_NEWOBJ_MAGIC)
 #define MINT_IS_LDC_I4(op) ((op) >= MINT_LDC_I4_M1 && (op) <= MINT_LDC_I4)
-#define MINT_IS_UNOP(op) ((op) >= MINT_ADD1_I4 && (op) <= MINT_CONV_R8_R4_SP)
+#define MINT_IS_UNOP(op) ((op) >= MINT_ADD1_I4 && (op) <= MINT_CEQ0_I4)
 #define MINT_IS_BINOP(op) ((op) >= MINT_ADD_I4 && (op) <= MINT_CLT_UN_R8)
-#define MINT_IS_LDLOCFLD(op) ((op) >= MINT_LDLOCFLD_I1 && (op) <= MINT_LDLOCFLD_O)
-#define MINT_IS_STLOCFLD(op) ((op) >= MINT_STLOCFLD_I1 && (op) <= MINT_STLOCFLD_O)
-#define MINT_IS_LOCUNOP(op) ((op) >= MINT_LOCADD1_I4 && (op) <= MINT_LOCSUB1_I8)
 #define MINT_IS_LDFLD(op) ((op) >= MINT_LDFLD_I1 && (op) <= MINT_LDFLD_O)
+#define MINT_IS_STFLD(op) ((op) >= MINT_STFLD_I1 && (op) <= MINT_STFLD_O)
 
-
-#define MINT_POP_ALL   -2
-#define MINT_VAR_PUSH  -1
-#define MINT_VAR_POP   -1
+#define MINT_CALL_ARGS 2
 
 extern unsigned char const mono_interp_oplen[];
-extern int const mono_interp_oppop[];
-extern int const mono_interp_oppush[];
+extern int const mono_interp_op_dregs [];
+extern int const mono_interp_op_sregs [];
 extern MintOpArgType const mono_interp_opargtype[];
-extern char* mono_interp_dis_mintop (gint32 ins_offset, gboolean native_offset, const guint16 *ip, guint16 opcode);
 extern const guint16* mono_interp_dis_mintop_len (const guint16 *ip);
 
 // This, instead of an array of pointers, to optimize away a pointer and a relocation per string.
index ae59aff..1994437 100644 (file)
@@ -129,6 +129,12 @@ MonoInterpStats mono_interp_stats;
 #define MINT_CONV_OVF_U4_P MINT_CONV_OVF_U4_I4
 #endif
 
+#if SIZEOF_VOID_P == 8
+#define MINT_MOV_P MINT_MOV_8
+#else
+#define MINT_MOV_P MINT_MOV_4
+#endif
+
 typedef struct {
        const gchar *op_name;
        guint16 insn [3];
@@ -137,7 +143,7 @@ typedef struct {
 // static const MagicIntrinsic int_binop[] = {
 
 static const MagicIntrinsic int_unnop[] = {
-       { "op_UnaryPlus", {MINT_NOP, MINT_NOP, MINT_NOP}},
+       { "op_UnaryPlus", {MINT_MOV_P, MINT_MOV_P, MINT_MOV_4}},
        { "op_UnaryNegation", {MINT_NEG_P, MINT_NEG_P, MINT_NEG_FP}},
        { "op_OnesComplement", {MINT_NOT_P, MINT_NOT_P, MINT_NIY}}
 };
@@ -181,6 +187,25 @@ static int stack_type [] = {
 
 static gboolean generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, MonoGenericContext *generic_context, MonoError *error);
 
+#define interp_ins_set_dreg(ins,dr) do { \
+       ins->dreg = dr; \
+} while (0)
+
+#define interp_ins_set_sreg(ins,s1) do { \
+       ins->sregs [0] = s1; \
+} while (0)
+
+#define interp_ins_set_sregs2(ins,s1,s2) do { \
+       ins->sregs [0] = s1; \
+       ins->sregs [1] = s2; \
+} while (0)
+
+#define interp_ins_set_sregs3(ins,s1,s2,s3) do { \
+       ins->sregs [0] = s1; \
+       ins->sregs [1] = s2; \
+       ins->sregs [2] = s3; \
+} while (0)
+
 static InterpInst*
 interp_new_ins (TransformData *td, guint16 opcode, int len)
 {
@@ -274,8 +299,10 @@ interp_prev_ins (InterpInst *ins)
 
 #define ENSURE_I4(td, sp_off) \
        do { \
-               if ((td)->sp [-sp_off].type == STACK_TYPE_I8) \
-                       interp_add_ins (td, sp_off == 1 ? MINT_CONV_I4_I8 : MINT_CONV_I4_I8_SP); \
+               if ((td)->sp [-sp_off].type == STACK_TYPE_I8) { \
+                       /* Same representation in memory, nothing to do */ \
+                       (td)->sp [-sp_off].type = STACK_TYPE_I4; \
+               } \
        } while (0)
 
 #define CHECK_TYPELOAD(klass) \
@@ -338,26 +365,6 @@ interp_prev_ins (InterpInst *ins)
 
 #endif
 
-// This does not handle the size/offset of the entry. For those cases
-// we need to manually pop the top of the stack and push a new entry.
-#define SET_SIMPLE_TYPE(s, ty) \
-       do { \
-               g_assert (ty != STACK_TYPE_VT); \
-               g_assert ((s)->type != STACK_TYPE_VT); \
-               (s)->type = (ty); \
-               (s)->flags = 0; \
-               (s)->klass = NULL; \
-       } while (0)
-
-#define SET_TYPE(s, ty, k) \
-       do { \
-               g_assert (ty != STACK_TYPE_VT); \
-               g_assert ((s)->type != STACK_TYPE_VT); \
-               (s)->type = (ty); \
-               (s)->flags = 0; \
-               (s)->klass = k; \
-       } while (0)
-
 static void
 realloc_stack (TransformData *td)
 {
@@ -377,6 +384,59 @@ get_tos_offset (TransformData *td)
                return td->sp [-1].offset + td->sp [-1].size;
 }
 
+static MonoType*
+get_type_from_stack (int type, MonoClass *klass)
+{
+       switch (type) {
+               case STACK_TYPE_I4: return m_class_get_byval_arg (mono_defaults.int32_class);
+               case STACK_TYPE_I8: return m_class_get_byval_arg (mono_defaults.int64_class);
+               case STACK_TYPE_R4: return m_class_get_byval_arg (mono_defaults.single_class);
+               case STACK_TYPE_R8: return m_class_get_byval_arg (mono_defaults.double_class);
+               case STACK_TYPE_O: return (klass && !m_class_is_valuetype (klass)) ? m_class_get_byval_arg (klass) : m_class_get_byval_arg (mono_defaults.object_class);
+               case STACK_TYPE_VT: return m_class_get_byval_arg (klass);
+               case STACK_TYPE_MP:
+               case STACK_TYPE_F:
+                       return m_class_get_byval_arg (mono_defaults.int_class);
+               default:
+                       g_assert_not_reached ();
+       }
+}
+
+/*
+ * These are additional locals that can be allocated as we transform the code.
+ * They are allocated past the method locals so they are accessed in the same
+ * way, with an offset relative to the frame->locals.
+ */
+static int
+create_interp_local_explicit (TransformData *td, MonoType *type, int size)
+{
+       if (td->locals_size == td->locals_capacity) {
+               td->locals_capacity *= 2;
+               if (td->locals_capacity == 0)
+                       td->locals_capacity = 2;
+               td->locals = (InterpLocal*) g_realloc (td->locals, td->locals_capacity * sizeof (InterpLocal));
+       }
+       td->locals [td->locals_size].type = type;
+       td->locals [td->locals_size].mt = mint_type (type);
+       td->locals [td->locals_size].flags = 0;
+       td->locals [td->locals_size].indirects = 0;
+       td->locals [td->locals_size].offset = -1;
+       td->locals [td->locals_size].size = size;
+       td->locals_size++;
+       return td->locals_size - 1;
+
+}
+
+static int
+create_interp_stack_local (TransformData *td, int type, MonoClass *k, int type_size, int offset)
+{
+       int local = create_interp_local_explicit (td, get_type_from_stack (type, k), type_size);
+
+       td->locals [local].flags |= INTERP_LOCAL_FLAG_EXECUTION_STACK;
+       td->locals [local].stack_offset = offset;
+       return local;
+}
+
 static void
 push_type_explicit (TransformData *td, int type, MonoClass *k, int type_size)
 {
@@ -390,15 +450,50 @@ push_type_explicit (TransformData *td, int type, MonoClass *k, int type_size)
        td->sp->klass = k;
        td->sp->flags = 0;
        td->sp->offset = get_tos_offset (td);
+       td->sp->local = create_interp_stack_local (td, type, k, type_size, td->sp->offset);
        td->sp->size = ALIGN_TO (type_size, MINT_STACK_SLOT_SIZE);
        if ((td->sp->size + td->sp->offset) > td->max_stack_size)
                td->max_stack_size = td->sp->size + td->sp->offset;
        td->sp++;
 }
 
+// This does not handle the size/offset of the entry. For those cases
+// we need to manually pop the top of the stack and push a new entry.
+#define SET_SIMPLE_TYPE(s, ty) \
+       do { \
+               g_assert (ty != STACK_TYPE_VT); \
+               g_assert ((s)->type != STACK_TYPE_VT); \
+               (s)->type = (ty); \
+               (s)->flags = 0; \
+               (s)->klass = NULL; \
+       } while (0)
+
+#define SET_TYPE(s, ty, k) \
+       do { \
+               g_assert (ty != STACK_TYPE_VT); \
+               g_assert ((s)->type != STACK_TYPE_VT); \
+               (s)->type = (ty); \
+               (s)->flags = 0; \
+               (s)->klass = k; \
+       } while (0)
+
+static void
+set_type_and_local (TransformData *td, StackInfo *sp, MonoClass *klass, int type)
+{
+       SET_TYPE (sp, type, klass);
+       sp->local = create_interp_stack_local (td, type, NULL, MINT_STACK_SLOT_SIZE, sp->offset);
+}
+
+static void
+set_simple_type_and_local (TransformData *td, StackInfo *sp, int type)
+{
+       set_type_and_local (td, sp, NULL, type);
+}
+
 static void
 push_type (TransformData *td, int type, MonoClass *k)
 {
+       // We don't really care about the exact size for non-valuetypes
        push_type_explicit (td, type, k, MINT_STACK_SLOT_SIZE);
 }
 
@@ -458,12 +553,15 @@ interp_merge_bblocks (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock
 
        // Remove the branch instruction to the invalid bblock
        if (bb->last_ins) {
-               if (bb->last_ins->opcode == MINT_BR || bb->last_ins->opcode == MINT_BR_S) {
-                       g_assert (bb->last_ins->info.target_bb == bbadd);
-                       interp_clear_ins (bb->last_ins);
-               } else if (bb->last_ins->opcode == MINT_SWITCH) {
-                       // Weird corner case where empty switch can branch by default to next instruction
-                       bb->last_ins->opcode = MINT_POP;
+               InterpInst *last_ins = (bb->last_ins->opcode != MINT_NOP) ? bb->last_ins : interp_prev_ins (bb->last_ins);
+               if (last_ins) {
+                       if (last_ins->opcode == MINT_BR || last_ins->opcode == MINT_BR_S) {
+                               g_assert (last_ins->info.target_bb == bbadd);
+                               interp_clear_ins (last_ins);
+                       } else if (last_ins->opcode == MINT_SWITCH) {
+                               // Weird corner case where empty switch can branch by default to next instruction
+                               last_ins->opcode = MINT_NOP;
+                       }
                }
        }
 
@@ -564,10 +662,70 @@ interp_link_bblocks (TransformData *td, InterpBasicBlock *from, InterpBasicBlock
        }
 }
 
+static int
+get_mov_for_type (int mt, gboolean needs_sext)
+{
+       switch (mt) {
+       case MINT_TYPE_I1:
+       case MINT_TYPE_U1:
+       case MINT_TYPE_I2:
+       case MINT_TYPE_U2:
+               if (needs_sext)
+                       return MINT_MOV_I1 + mt;
+               else
+                       return MINT_MOV_4;
+       case MINT_TYPE_I4:
+       case MINT_TYPE_R4:
+               return MINT_MOV_4;
+       case MINT_TYPE_I8:
+       case MINT_TYPE_R8:
+               return MINT_MOV_8;
+       case MINT_TYPE_O:
+#if SIZEOF_VOID_P == 8
+               return MINT_MOV_8;
+#else
+               return MINT_MOV_4;
+#endif
+       case MINT_TYPE_VT:
+               return MINT_MOV_VT;
+       }
+       g_assert_not_reached ();
+}
+
+// Should be called when td->cbb branches to newbb and newbb can have a stack state
+static void
+fixup_newbb_stack_locals (TransformData *td, InterpBasicBlock *newbb)
+{
+       if (newbb->stack_height <= 0)
+               return;
+
+       for (int i = 0; i < newbb->stack_height; i++) {
+               int sloc = td->stack [i].local;
+               int dloc = newbb->stack_state [i].local;
+               if (sloc != dloc) {
+                       int mt = td->locals [sloc].mt;
+                       int mov_op = get_mov_for_type (mt, FALSE);
+
+                       // FIXME can be hit in some IL cases. Should we merge the stack states ? (b41002.il)
+                       // g_assert (mov_op == get_mov_for_type (td->locals [dloc].mt, FALSE));
+
+                       interp_add_ins (td, mov_op);
+                       interp_ins_set_sreg (td->last_ins, td->stack [i].local);
+                       interp_ins_set_dreg (td->last_ins, newbb->stack_state [i].local);
+
+                       if (mt == MINT_TYPE_VT) {
+                               g_assert (td->locals [sloc].size == td->locals [dloc].size);
+                               td->last_ins->data [0] = td->locals [sloc].size;
+                       }
+               }
+       }
+}
+
 // Initializes stack state at entry to bb, based on the current stack state
 static void
 init_bb_stack_state (TransformData *td, InterpBasicBlock *bb)
 {
+       // FIXME If already initialized, then we need to generate mov to the registers in the state.
        // Check if already initialized
        if (bb->stack_height >= 0)
                return;
@@ -601,6 +759,7 @@ handle_branch (TransformData *td, int short_op, int long_op, int offset)
        if (short_op == MINT_LEAVE_S || short_op == MINT_LEAVE_S_CHECK)
                target_bb->eh_block = TRUE;
 
+       fixup_newbb_stack_locals (td, target_bb);
        if (offset > 0)
                init_bb_stack_state (td, target_bb);
 
@@ -626,10 +785,26 @@ one_arg_branch(TransformData *td, int mint_op, int offset, int inst_size)
        int short_op = long_op + MINT_BRFALSE_I4_S - MINT_BRFALSE_I4;
        CHECK_STACK(td, 1);
        --td->sp;
-       if (offset)
+       if (offset) {
                handle_branch (td, short_op, long_op, offset + inst_size);
+               interp_ins_set_sreg (td->last_ins, td->sp->local);
+       } else {
+               interp_add_ins (td, MINT_NOP);
+       }
+}
+
+static void
+interp_add_conv (TransformData *td, StackInfo *sp, InterpInst *prev_ins, int type, int conv_op)
+{
+       InterpInst *new_inst;
+       if (prev_ins)
+               new_inst = interp_insert_ins (td, prev_ins, conv_op);
        else
-               interp_add_ins (td, MINT_POP);
+               new_inst = interp_add_ins (td, conv_op);
+
+       interp_ins_set_sreg (new_inst, sp->local);
+       set_simple_type_and_local (td, sp, type);
+       interp_ins_set_dreg (new_inst, sp->local);
 }
 
 static void 
@@ -641,19 +816,15 @@ two_arg_branch(TransformData *td, int mint_op, int offset, int inst_size)
 
        if (type1 == STACK_TYPE_I4 && type2 == STACK_TYPE_I8) {
                // The il instruction starts with the actual branch, and not with the conversion opcodes
-               interp_insert_ins (td, td->last_ins, MINT_CONV_I8_I4);
-               SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I8);
+               interp_add_conv (td, td->sp - 1, td->last_ins, STACK_TYPE_I8, MINT_CONV_I8_I4);
                type1 = STACK_TYPE_I8;
        } else if (type1 == STACK_TYPE_I8 && type2 == STACK_TYPE_I4) {
-               interp_insert_ins (td, td->last_ins, MINT_CONV_I8_I4_SP);
-               SET_SIMPLE_TYPE (td->sp - 2, STACK_TYPE_I8);
+               interp_add_conv (td, td->sp - 2, td->last_ins, STACK_TYPE_I8, MINT_CONV_I8_I4);
        } else if (type1 == STACK_TYPE_R4 && type2 == STACK_TYPE_R8) {
-               interp_insert_ins (td, td->last_ins, MINT_CONV_R8_R4);
-               SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_R8);
+               interp_add_conv (td, td->sp - 1, td->last_ins, STACK_TYPE_R8, MINT_CONV_R8_R4);
                type1 = STACK_TYPE_R8;
        } else if (type1 == STACK_TYPE_R8 && type2 == STACK_TYPE_R4) {
-               interp_insert_ins (td, td->last_ins, MINT_CONV_R8_R4_SP);
-               SET_SIMPLE_TYPE (td->sp - 2, STACK_TYPE_R8);
+               interp_add_conv (td, td->sp - 2, td->last_ins, STACK_TYPE_R8, MINT_CONV_R8_R4);
        } else if (type1 != type2) {
                g_warning("%s.%s: branch type mismatch %d %d", 
                        m_class_get_name (td->method->klass), td->method->name,
@@ -665,9 +836,9 @@ two_arg_branch(TransformData *td, int mint_op, int offset, int inst_size)
        td->sp -= 2;
        if (offset) {
                handle_branch (td, short_op, long_op, offset + inst_size);
+               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
        } else {
-               interp_add_ins (td, MINT_POP);
-               interp_add_ins (td, MINT_POP);
+               interp_add_ins (td, MINT_NOP);
        }
 }
 
@@ -676,7 +847,11 @@ unary_arith_op(TransformData *td, int mint_op)
 {
        int op = mint_op + td->sp [-1].type - STACK_TYPE_I4;
        CHECK_STACK(td, 1);
+       td->sp--;
        interp_add_ins (td, op);
+       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+       push_simple_type (td, td->sp [0].type);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 }
 
 static void
@@ -687,23 +862,21 @@ binary_arith_op(TransformData *td, int mint_op)
        int op;
 #if SIZEOF_VOID_P == 8
        if ((type1 == STACK_TYPE_MP || type1 == STACK_TYPE_I8) && type2 == STACK_TYPE_I4) {
-               interp_add_ins (td, MINT_CONV_I8_I4);
+               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
                type2 = STACK_TYPE_I8;
        }
        if (type1 == STACK_TYPE_I4 && (type2 == STACK_TYPE_MP || type2 == STACK_TYPE_I8)) {
-               interp_add_ins (td, MINT_CONV_I8_I4_SP);
+               interp_add_conv (td, td->sp - 2, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
                type1 = STACK_TYPE_I8;
-               td->sp [-2].type = STACK_TYPE_I8;
        }
 #endif
        if (type1 == STACK_TYPE_R8 && type2 == STACK_TYPE_R4) {
-               interp_add_ins (td, MINT_CONV_R8_R4);
+               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R8_R4);
                type2 = STACK_TYPE_R8;
        }
        if (type1 == STACK_TYPE_R4 && type2 == STACK_TYPE_R8) {
-               interp_add_ins (td, MINT_CONV_R8_R4_SP);
+               interp_add_conv (td, td->sp - 2, NULL, STACK_TYPE_R8, MINT_CONV_R8_R4);
                type1 = STACK_TYPE_R8;
-               td->sp [-2].type = STACK_TYPE_R8;
        }
        if (type1 == STACK_TYPE_MP)
                type1 = STACK_TYPE_I;
@@ -716,8 +889,11 @@ binary_arith_op(TransformData *td, int mint_op)
        }
        op = mint_op + type1 - STACK_TYPE_I4;
        CHECK_STACK(td, 2);
+       td->sp -= 2;
        interp_add_ins (td, op);
-       --td->sp;
+       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+       push_simple_type (td, type1);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 }
 
 static void
@@ -730,8 +906,11 @@ shift_op(TransformData *td, int mint_op)
                        m_class_get_name (td->method->klass), td->method->name,
                        td->sp [-2].type);
        }
+       td->sp -= 2;
        interp_add_ins (td, op);
-       --td->sp;
+       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+       push_simple_type (td, td->sp [0].type);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 }
 
 static int 
@@ -764,6 +943,7 @@ get_arg_type_exact (TransformData *td, int n, int *mt)
 static void 
 load_arg(TransformData *td, int n)
 {
+       gint32 size = 0;
        int mt;
        MonoClass *klass = NULL;
        MonoType *type;
@@ -772,7 +952,6 @@ load_arg(TransformData *td, int n)
        type = get_arg_type_exact (td, n, &mt);
 
        if (mt == MINT_TYPE_VT) {
-               gint32 size;
                klass = mono_class_from_mono_type_internal (type);
                if (mono_method_signature_internal (td->method)->pinvoke)
                        size = mono_class_native_size (klass, NULL);
@@ -781,14 +960,10 @@ load_arg(TransformData *td, int n)
 
                if (hasthis && n == 0) {
                        mt = MINT_TYPE_I;
-                       interp_add_ins (td, MINT_LDLOC_O);
-                       td->last_ins->data [0] = 0;
                        klass = NULL;
                        push_type (td, stack_type [mt], klass);
                } else {
-                       interp_add_ins (td, MINT_LDLOC_VT);
-                       td->last_ins->data [0] = n;
-                       WRITE32_INS (td->last_ins, 1, &size);
+                       g_assert (size < G_MAXUINT16);
                        push_type_vt (td, klass, size);
                }
        } else {
@@ -796,21 +971,23 @@ load_arg(TransformData *td, int n)
                        // Special case loading of the first ptr sized argument
                        if (mt != MINT_TYPE_O)
                                mt = MINT_TYPE_I;
-                       interp_add_ins (td, MINT_LDLOC_O);
-                       td->last_ins->data [0] = 0;
                } else {
-                       interp_add_ins (td, MINT_LDLOC_I1 + (mt - MINT_TYPE_I1));
-                       td->last_ins->data [0] = n;
                        if (mt == MINT_TYPE_O)
                                klass = mono_class_from_mono_type_internal (type);
                }
                push_type (td, stack_type [mt], klass);
        }
+       interp_add_ins (td, get_mov_for_type (mt, TRUE));
+       interp_ins_set_sreg (td->last_ins, n);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local); 
+       if (mt == MINT_TYPE_VT)
+               td->last_ins->data [0] = size;
 }
 
 static void 
 store_arg(TransformData *td, int n)
 {
+       gint32 size = 0;
        int mt;
        CHECK_STACK (td, 1);
        MonoType *type;
@@ -818,82 +995,66 @@ store_arg(TransformData *td, int n)
        type = get_arg_type_exact (td, n, &mt);
 
        if (mt == MINT_TYPE_VT) {
-               gint32 size;
                MonoClass *klass = mono_class_from_mono_type_internal (type);
                if (mono_method_signature_internal (td->method)->pinvoke)
                        size = mono_class_native_size (klass, NULL);
                else
                        size = mono_class_value_size (klass, NULL);
-               interp_add_ins (td, MINT_STLOC_VT);
-               td->last_ins->data [0] = n;
-               WRITE32_INS (td->last_ins, 1, &size);
-       } else {
-               interp_add_ins (td, MINT_STLOC_I1 + (mt - MINT_TYPE_I1));
-               td->last_ins->data [0] = n;
+               g_assert (size < G_MAXUINT16);
        }
        --td->sp;
+       interp_add_ins (td, get_mov_for_type (mt, FALSE));
+       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+       interp_ins_set_dreg (td->last_ins, n);
+       if (mt == MINT_TYPE_VT)
+               td->last_ins->data [0] = size;
 }
 
 static void
 load_local (TransformData *td, int local)
 {
-       MonoType *type = td->locals [local].type;
        int mt = td->locals [local].mt;
-       MonoClass *klass = NULL;
+       gint32 size = td->locals [local].size;
+       MonoType *type = td->locals [local].type;
+
        if (mt == MINT_TYPE_VT) {
-               klass = mono_class_from_mono_type_internal (type);
-               gint32 size = mono_class_value_size (klass, NULL);
-               interp_add_ins (td, MINT_LDLOC_VT);
-               td->last_ins->data [0] = local;
-               WRITE32_INS (td->last_ins, 1, &size);
+               MonoClass *klass = mono_class_from_mono_type_internal (type);
                push_type_vt (td, klass, size);
        } else {
-               g_assert (mt < MINT_TYPE_VT);
-               interp_add_ins (td, MINT_LDLOC_I1 + (mt - MINT_TYPE_I1));
-               td->last_ins->data [0] = local;
+               MonoClass *klass = NULL;
                if (mt == MINT_TYPE_O)
                        klass = mono_class_from_mono_type_internal (type);
                push_type (td, stack_type [mt], klass);
        }
+       interp_add_ins (td, get_mov_for_type (mt, TRUE));
+       interp_ins_set_sreg (td->last_ins, local);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       if (mt == MINT_TYPE_VT)
+               td->last_ins->data [0] = size;
 }
 
 static void 
 store_local (TransformData *td, int local)
 {
-       MonoType *type = td->locals [local].type;
        int mt = td->locals [local].mt;
        CHECK_STACK (td, 1);
 #if SIZEOF_VOID_P == 8
-       if (td->sp [-1].type == STACK_TYPE_I4 && stack_type [mt] == STACK_TYPE_I8) {
-               interp_add_ins (td, MINT_CONV_I8_I4);
-               td->sp [-1].type = STACK_TYPE_I8;
-       }
+       if (td->sp [-1].type == STACK_TYPE_I4 && stack_type [mt] == STACK_TYPE_I8)
+               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
 #endif
        if (!can_store(td->sp [-1].type, stack_type [mt])) {
                g_warning("%s.%s: Store local stack type mismatch %d %d", 
                        m_class_get_name (td->method->klass), td->method->name,
                        stack_type [mt], td->sp [-1].type);
        }
-       if (mt == MINT_TYPE_VT) {
-               MonoClass *klass = mono_class_from_mono_type_internal (type);
-               gint32 size = mono_class_value_size (klass, NULL);
-               interp_add_ins (td, MINT_STLOC_VT);
-               td->last_ins->data [0] = local;
-               WRITE32_INS (td->last_ins, 1, &size);
-       } else {
-               g_assert (mt < MINT_TYPE_VT);
-               interp_add_ins (td, MINT_STLOC_I1 + (mt - MINT_TYPE_I1));
-               td->last_ins->data [0] = local;
-       }
        --td->sp;
+       interp_add_ins (td, get_mov_for_type (mt, FALSE));
+       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+       interp_ins_set_dreg (td->last_ins, local);
+       if (mt == MINT_TYPE_VT)
+               td->last_ins->data [0] = td->locals [local].size;
 }
 
-#define SIMPLE_OP(td, op) \
-       do { \
-               interp_add_ins (td, op); \
-               ++td->ip; \
-       } while (0)
-
 static guint16
 get_data_item_index (TransformData *td, void *ptr)
 {
@@ -1003,17 +1164,22 @@ interp_generate_mae_throw (TransformData *td, MonoMethod *method, MonoMethod *ta
 
        /* Inject code throwing MethodAccessException */
        interp_add_ins (td, MINT_MONO_LDPTR);
-       td->last_ins->data [0] = get_data_item_index (td, method);
        push_simple_type (td, STACK_TYPE_I);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       td->last_ins->data [0] = get_data_item_index (td, method);
+       td->locals [td->sp [-1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
 
        interp_add_ins (td, MINT_MONO_LDPTR);
-       td->last_ins->data [0] = get_data_item_index (td, target_method);
        push_simple_type (td, STACK_TYPE_I);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       td->last_ins->data [0] = get_data_item_index (td, target_method);
+       td->locals [td->sp [-1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
 
+       td->sp -= 2;
        interp_add_ins (td, MINT_ICALL_PP_V);
+       interp_ins_set_dreg (td->last_ins, td->sp [0].local);
        td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func);
 
-       td->sp -= 2;
 }
 
 static void
@@ -1022,6 +1188,10 @@ interp_generate_bie_throw (TransformData *td)
        MonoJitICallInfo *info = &mono_get_jit_icall_info ()->mono_throw_bad_image;
 
        interp_add_ins (td, MINT_ICALL_V_V);
+       // Allocate a dummy local to serve as dreg for this instruction
+       push_simple_type (td, STACK_TYPE_I4);
+       td->sp--;
+       interp_ins_set_dreg (td->last_ins, td->sp [0].local);
        td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func);
 }
 
@@ -1031,6 +1201,10 @@ interp_generate_not_supported_throw (TransformData *td)
        MonoJitICallInfo *info = &mono_get_jit_icall_info ()->mono_throw_not_supported;
 
        interp_add_ins (td, MINT_ICALL_V_V);
+       // Allocate a dummy local to serve as dreg for this instruction
+       push_simple_type (td, STACK_TYPE_I4);
+       td->sp--;
+       interp_ins_set_dreg (td->last_ins, td->sp [0].local);
        td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func);
 }
 
@@ -1042,38 +1216,15 @@ interp_generate_ipe_throw_with_msg (TransformData *td, MonoError *error_msg)
        char *msg = mono_mem_manager_strdup (td->mem_manager, mono_error_get_message (error_msg));
 
        interp_add_ins (td, MINT_MONO_LDPTR);
-       td->last_ins->data [0] = get_data_item_index (td, msg);
        push_simple_type (td, STACK_TYPE_I);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       td->locals [td->sp [-1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+       td->last_ins->data [0] = get_data_item_index (td, msg);
 
+       td->sp -= 1;
        interp_add_ins (td, MINT_ICALL_P_V);
+       interp_ins_set_dreg (td->last_ins, td->sp [0].local);
        td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func);
-
-       td->sp -= 1;
-}
-
-/*
- * These are additional locals that can be allocated as we transform the code.
- * They are allocated past the method locals so they are accessed in the same
- * way, with an offset relative to the frame->locals.
- */
-static int
-create_interp_local_explicit (TransformData *td, MonoType *type, int size)
-{
-       if (td->locals_size == td->locals_capacity) {
-               td->locals_capacity *= 2;
-               if (td->locals_capacity == 0)
-                       td->locals_capacity = 2;
-               td->locals = (InterpLocal*) g_realloc (td->locals, td->locals_capacity * sizeof (InterpLocal));
-       }
-       td->locals [td->locals_size].type = type;
-       td->locals [td->locals_size].mt = mint_type (type);
-       td->locals [td->locals_size].flags = 0;
-       td->locals [td->locals_size].indirects = 0;
-       td->locals [td->locals_size].offset = -1;
-       td->locals [td->locals_size].size = size;
-       td->locals_size++;
-       return td->locals_size - 1;
-
 }
 
 static int
@@ -1088,32 +1239,162 @@ create_interp_local (TransformData *td, MonoType *type)
 }
 
 static int
-get_interp_local_offset (TransformData *td, int local)
+get_interp_local_offset (TransformData *td, int local, gboolean resolve_stack_locals)
 {
-       int size, offset;
+       // FIXME MINT_PROF_EXIT when void
+       if (local == -1)
+               return -1;
+
+       if ((td->locals [local].flags & INTERP_LOCAL_FLAG_EXECUTION_STACK) && !resolve_stack_locals)
+               return -1;
 
        if (td->locals [local].offset != -1)
                return td->locals [local].offset;
 
-       offset = td->total_locals_size;
-       size = td->locals [local].size;
+       if (td->locals [local].flags & INTERP_LOCAL_FLAG_EXECUTION_STACK) {
+               td->locals [local].offset = td->total_locals_size + td->locals [local].stack_offset;
+       } else {
+               int size, offset;
+
+               offset = td->total_locals_size;
+               size = td->locals [local].size;
 
-       td->locals [local].offset = offset;
+               td->locals [local].offset = offset;
+
+               td->total_locals_size = ALIGN_TO (offset + size, MINT_STACK_SLOT_SIZE);
+       }
 
-       td->total_locals_size = ALIGN_TO (offset + size, MINT_STACK_SLOT_SIZE);
        //g_assert (td->total_locals_size < G_MAXUINT16);
 
-       return offset;
+       return td->locals [local].offset;
+}
+
+/*
+ * ins_offset is the associated offset of this instruction
+ * if ins is null, it means the data belongs to an instruction that was
+ * emitted in the final code
+ * ip is the address where the arguments of the instruction are located
+ */
+static char*
+dump_interp_ins_data (InterpInst *ins, gint32 ins_offset, const guint16 *data, guint16 opcode)
+{
+       GString *str = g_string_new ("");
+       guint32 token;
+       int target;
+
+       switch (mono_interp_opargtype [opcode]) {
+       case MintOpNoArgs:
+               break;
+       case MintOpUShortInt:
+               g_string_append_printf (str, " %u", *(guint16*)data);
+               break;
+       case MintOpTwoShorts:
+               g_string_append_printf (str, " %u,%u", *(guint16*)data, *(guint16 *)(data + 1));
+               break;
+       case MintOpShortAndInt:
+               g_string_append_printf (str, " %u,%u", *(guint16*)data, (guint32)READ32(data + 1));
+               break;
+       case MintOpShortInt:
+               g_string_append_printf (str, " %d", *(gint16*)data);
+               break;
+       case MintOpClassToken:
+       case MintOpMethodToken:
+       case MintOpFieldToken:
+               token = * (guint16 *) data;
+               g_string_append_printf (str, " %u", token);
+               break;
+       case MintOpInt:
+               g_string_append_printf (str, " %d", (gint32)READ32 (data));
+               break;
+       case MintOpLongInt:
+               g_string_append_printf (str, " %" PRId64, (gint64)READ64 (data));
+               break;
+       case MintOpFloat: {
+               gint32 tmp = READ32 (data);
+               g_string_append_printf (str, " %g", * (float *)&tmp);
+               break;
+       }
+       case MintOpDouble: {
+               gint64 tmp = READ64 (data);
+               g_string_append_printf (str, " %g", * (double *)&tmp);
+               break;
+       }
+       case MintOpShortBranch:
+               if (ins) {
+                       /* the target IL is already embedded in the instruction */
+                       g_string_append_printf (str, " BB%d", ins->info.target_bb->index);
+               } else {
+                       target = ins_offset + *(gint16*)data;
+                       g_string_append_printf (str, " IR_%04x", target);
+               }
+               break;
+       case MintOpBranch:
+               if (ins) {
+                       g_string_append_printf (str, " BB%d", ins->info.target_bb->index);
+               } else {
+                       target = ins_offset + (gint32)READ32 (data);
+                       g_string_append_printf (str, " IR_%04x", target);
+               }
+               break;
+       case MintOpSwitch: {
+               int sval = (gint32)READ32 (data);
+               int i;
+               g_string_append_printf (str, "(");
+               gint32 p = 2;
+               for (i = 0; i < sval; ++i) {
+                       if (i > 0)
+                               g_string_append_printf (str, ", ");
+                       if (ins) {
+                               g_string_append_printf (str, "BB%d", ins->info.target_bb_table [i]->index);
+                       } else {
+                               g_string_append_printf (str, "IR_%04x", (gint32)READ32 (data + p));
+                       }
+                       p += 2;
+               }
+               g_string_append_printf (str, ")");
+               break;
+       }
+       default:
+               g_string_append_printf (str, "unknown arg type\n");
+       }
+
+       return g_string_free (str, FALSE);
+}
+
+static void
+dump_interp_compacted_ins (const guint16 *ip, const guint16 *start)
+{
+       int opcode = *ip;
+       int ins_offset = ip - start;
+
+       g_print ("IR_%04x: %-14s", ins_offset, mono_interp_opname (opcode));
+       ip++;
+
+        if (mono_interp_op_dregs [opcode] == MINT_CALL_ARGS)
+                g_print (" [call_args %d <-", *ip++);
+        else if (mono_interp_op_dregs [opcode] > 0)
+                g_print (" [%d <-", *ip++);
+        else
+                g_print (" [nil <-");
+
+        if (mono_interp_op_sregs [opcode] > 0) {
+                for (int i = 0; i < mono_interp_op_sregs [opcode]; i++)
+                        g_print (" %d", *ip++);
+                g_print ("],");
+        } else {
+                g_print (" nil],");
+        }
+       char *ins = dump_interp_ins_data (NULL, ins_offset, ip, opcode);
+       g_print ("%s\n", ins);
+       g_free (ins);
 }
 
 static void
-dump_mint_code (const guint16 *start, const guint16* end)
+dump_interp_code (const guint16 *start, const guint16* end)
 {
        const guint16 *p = start;
        while (p < end) {
-               char *ins = mono_interp_dis_mintop ((gint32)(p - start), TRUE, p + 1, *p);
-               g_print ("%s\n", ins);
-               g_free (ins);
+               dump_interp_compacted_ins (p, start);
                p = mono_interp_dis_mintop_len (p);
        }
 }
@@ -1121,9 +1402,32 @@ dump_mint_code (const guint16 *start, const guint16* end)
 static void
 dump_interp_inst_no_newline (InterpInst *ins)
 {
-       char *descr = mono_interp_dis_mintop (ins->il_offset, FALSE, &ins->data [0], ins->opcode);
-       g_print ("%s", descr);
-       g_free (descr);
+       int opcode = ins->opcode;
+       g_print ("IL_%04x: %-14s", ins->il_offset, mono_interp_opname (opcode));
+
+        if (mono_interp_op_dregs [opcode] == MINT_CALL_ARGS)
+                g_print (" [call_args %d <-", ins->dreg);
+        else if (mono_interp_op_dregs [opcode] > 0)
+                g_print (" [%d <-", ins->dreg);
+        else
+                g_print (" [nil <-");
+
+        if (mono_interp_op_sregs [opcode] > 0) {
+                for (int i = 0; i < mono_interp_op_sregs [opcode]; i++)
+                        g_print (" %d", ins->sregs [i]);
+                g_print ("],");
+        } else {
+                g_print (" nil],");
+        }
+
+       if (opcode == MINT_LDLOCA_S) {
+               // LDLOCA has special semantics, it has data in sregs [0], but it doesn't have any sregs
+               g_print (" %d", ins->sregs [0]);
+       } else {
+               char *descr = dump_interp_ins_data (ins, ins->il_offset, &ins->data [0], ins->opcode);
+               g_print ("%s", descr);
+               g_free (descr);
+       }
 }
 
 static void
@@ -1157,7 +1461,7 @@ mono_interp_print_code (InterpMethod *imethod)
        g_free (name);
 
        start = (guint8*) jinfo->code_start;
-       dump_mint_code ((const guint16*)start, (const guint16*)(start + jinfo->code_size));
+       dump_interp_code ((const guint16*)start, (const guint16*)(start + jinfo->code_size));
 }
 
 /* For debug use */
@@ -1198,10 +1502,10 @@ emit_store_value_as_local (TransformData *td, MonoType *src)
        store_local (td, local);
 
        interp_add_ins (td, MINT_LDLOCA_S);
-       td->last_ins->data [0] = local;
-       td->locals [local].indirects++;
-
        push_simple_type (td, STACK_TYPE_MP);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       interp_ins_set_sreg (td->last_ins, local);
+       td->locals [local].indirects++;
 }
 
 static gboolean
@@ -1241,7 +1545,7 @@ interp_get_const_from_ldc_i4 (InterpInst *ins)
 
 /* If ins is not null, it will replace it with the ldc */
 static InterpInst*
-interp_get_ldc_i4_from_const (TransformData *td, InterpInst *ins, gint32 ct)
+interp_get_ldc_i4_from_const (TransformData *td, InterpInst *ins, gint32 ct, int dreg)
 {
        int opcode;
        switch (ct) {
@@ -1276,10 +1580,11 @@ interp_get_ldc_i4_from_const (TransformData *td, InterpInst *ins, gint32 ct)
        } else {
                ins->opcode = opcode;
        }
+       interp_ins_set_dreg (ins, dreg);
 
-       if (new_size == 2)
+       if (new_size == 3)
                ins->data [0] = (gint8)ct;
-       else if (new_size == 3)
+       else if (new_size == 4)
                WRITE32_INS (ins, 0, &ct);
 
        return ins;
@@ -1289,6 +1594,7 @@ static InterpInst*
 interp_inst_replace_with_i8_const (TransformData *td, InterpInst *ins, gint64 ct)
 {
        int size = mono_interp_oplen [ins->opcode];
+       int dreg = ins->dreg;
 
        if (size < 5) {
                ins = interp_insert_ins (td, ins, MINT_LDC_I8);
@@ -1297,6 +1603,7 @@ interp_inst_replace_with_i8_const (TransformData *td, InterpInst *ins, gint64 ct
                ins->opcode = MINT_LDC_I8;
        }
        WRITE64_INS (ins, 0, &ct);
+       ins->dreg = dreg;
 
        return ins;
 }
@@ -1324,19 +1631,24 @@ static void
 interp_emit_ldobj (TransformData *td, MonoClass *klass)
 {
        int mt = mint_type (m_class_get_byval_arg (klass));
-       int size;
+       gint32 size;
        td->sp--;
 
        if (mt == MINT_TYPE_VT) {
                interp_add_ins (td, MINT_LDOBJ_VT);
                size = mono_class_value_size (klass, NULL);
-               WRITE32_INS (td->last_ins, 0, &size);
+               g_assert (size < G_MAXUINT16);
+               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                push_type_vt (td, klass, size);
        } else {
                int opcode = interp_get_ldind_for_mt (mt);
                interp_add_ins (td, opcode);
+               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                push_type (td, stack_type [mt], klass);
        }
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       if (mt == MINT_TYPE_VT)
+               td->last_ins->data [0] = size;
 }
 
 static void
@@ -1378,6 +1690,7 @@ interp_emit_stobj (TransformData *td, MonoClass *klass)
                interp_add_ins (td, opcode);
        }
        td->sp -= 2;
+       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
 }
 
 static void
@@ -1386,27 +1699,39 @@ interp_emit_ldelema (TransformData *td, MonoClass *array_class, MonoClass *check
        MonoClass *element_class = m_class_get_element_class (array_class);
        int rank = m_class_get_rank (array_class);
        int size = mono_class_array_element_size (element_class);
+       gboolean call_args = FALSE;
 
        gboolean bounded = m_class_get_byval_arg (array_class) ? m_class_get_byval_arg (array_class)->type == MONO_TYPE_ARRAY : FALSE;
 
+       td->sp -= rank + 1;
        // We only need type checks when writing to array of references
        if (!check_class || m_class_is_valuetype (element_class)) {
                if (rank == 1 && !bounded) {
                        interp_add_ins (td, MINT_LDELEMA1);
-                       WRITE32_INS (td->last_ins, 0, &size);
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                       g_assert (size < G_MAXUINT16);
+                       td->last_ins->data [0] = size;
                } else {
                        interp_add_ins (td, MINT_LDELEMA);
+                       for (int i = 0; i < rank + 1; i++)
+                               td->locals [td->sp [i].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
                        td->last_ins->data [0] = rank;
-                       WRITE32_INS (td->last_ins, 1, &size);
+                       g_assert (size < G_MAXUINT16);
+                       td->last_ins->data [1] = size;
+                       call_args = TRUE;
                }
        } else {
                interp_add_ins (td, MINT_LDELEMA_TC);
-               td->last_ins->data [0] = rank;
-               td->last_ins->data [1] = get_data_item_index (td, check_class);
+               for (int i = 0; i < rank + 1; i++)
+                       td->locals [td->sp [i].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+               td->last_ins->data [0] = get_data_item_index (td, check_class);
+               call_args = TRUE;
        }
 
-       td->sp -= rank;
-       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_MP);
+       push_simple_type (td, STACK_TYPE_MP);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       if (call_args)
+               td->locals [td->sp [-1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
 }
 
 static gboolean
@@ -1425,10 +1750,10 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
                if (arg_size > SIZEOF_VOID_P) { // 8 -> 4
                        switch (type_index) {
                        case 0: case 1:
-                               interp_add_ins (td, MINT_CONV_I4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I4_I8);
                                break;
                        case 2:
-                               interp_add_ins (td, MINT_CONV_R4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_R4_R8);
                                break;
                        }
                }
@@ -1436,13 +1761,13 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
                if (arg_size < SIZEOF_VOID_P) { // 4 -> 8
                        switch (type_index) {
                        case 0:
-                               interp_add_ins (td, MINT_CONV_I8_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
                                break;
                        case 1:
-                               interp_add_ins (td, MINT_CONV_I8_U4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_U4);
                                break;
                        case 2:
-                               interp_add_ins (td, MINT_CONV_R8_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R8_R4);
                                break;
                        }
                }
@@ -1463,8 +1788,8 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
 #endif
                        break;
                }
-
                td->sp -= 2;
+               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                td->ip += 5;
                return TRUE;
        } else if (!strcmp ("op_Implicit", tm ) || !strcmp ("op_Explicit", tm)) {
@@ -1509,10 +1834,10 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
                if (src_size > dst_size) { // 8 -> 4
                        switch (type_index) {
                        case 0: case 1:
-                               interp_add_ins (td, MINT_CONV_I4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I4_I8);
                                break;
                        case 2:
-                               interp_add_ins (td, MINT_CONV_R4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R4, MINT_CONV_R4_R8);
                                break;
                        }
                }
@@ -1520,13 +1845,13 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
                if (src_size < dst_size) { // 4 -> 8
                        switch (type_index) {
                        case 0:
-                               interp_add_ins (td, MINT_CONV_I8_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
                                break;
                        case 1:
-                               interp_add_ins (td, MINT_CONV_I8_U4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_U4);
                                break;
                        case 2:
-                               interp_add_ins (td, MINT_CONV_R8_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R8_R4);
                                break;
                        }
                }
@@ -1541,7 +1866,10 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
 #else
                interp_add_ins (td, MINT_ADD1_I4);
 #endif
-               SET_TYPE (td->sp - 1, stack_type [mt], magic_class);
+               td->sp--;
+               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+               push_type (td, stack_type [mt], magic_class);
+               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                td->ip += 5;
                return TRUE;
        } else if (!strcmp ("op_Decrement", tm)) {
@@ -1551,7 +1879,10 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
 #else
                interp_add_ins (td, MINT_SUB1_I4);
 #endif
-               SET_TYPE (td->sp - 1, stack_type [mt], magic_class);
+               td->sp--;
+               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+               push_type (td, stack_type [mt], magic_class);
+               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                td->ip += 5;
                return TRUE;
        } else if (!strcmp ("CompareTo", tm) || !strcmp ("Equals", tm)) {
@@ -1581,7 +1912,10 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
        for (i = 0; i < sizeof (int_unnop) / sizeof  (MagicIntrinsic); ++i) {
                if (!strcmp (int_unnop [i].op_name, tm)) {
                        interp_add_ins (td, int_unnop [i].insn [type_index]);
-                       SET_TYPE (td->sp - 1, stack_type [mt], magic_class);
+                       td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_type (td, stack_type [mt], magic_class);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                }
@@ -1590,8 +1924,10 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
        for (i = 0; i < sizeof (int_binop) / sizeof  (MagicIntrinsic); ++i) {
                if (!strcmp (int_binop [i].op_name, tm)) {
                        interp_add_ins (td, int_binop [i].insn [type_index]);
-                       td->sp -= 1;
-                       SET_TYPE (td->sp - 1, stack_type [mt], magic_class);
+                       td->sp -= 2;
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                       push_type (td, stack_type [mt], magic_class);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                }
@@ -1601,8 +1937,10 @@ interp_handle_magic_type_intrinsics (TransformData *td, MonoMethod *target_metho
                if (!strcmp (int_cmpop [i].op_name, tm)) {
                        MonoClass *k = mono_defaults.boolean_class;
                        interp_add_ins (td, int_cmpop [i].insn [type_index]);
-                       td->sp -= 1;
-                       SET_TYPE (td->sp - 1, stack_type [mint_type (m_class_get_byval_arg (k))], k);
+                       td->sp -= 2;
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                       push_type (td, stack_type [mint_type (m_class_get_byval_arg (k))], k);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                }
@@ -1725,7 +2063,10 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                        interp_add_ins (td, MINT_INTRINS_U32_TO_DECSTR);
                        td->last_ins->data [0] = get_data_item_index (td, (char*)mono_vtable_get_static_field_data (vtable) + field->offset);
                        td->last_ins->data [1] = get_data_item_index (td, mono_class_vtable_checked (td->rtm->domain, mono_defaults.string_class, error));
-                       SET_TYPE (td->sp - 1, STACK_TYPE_O, mono_defaults.string_class);
+                       td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_type (td, STACK_TYPE_O, mono_defaults.string_class);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                }
@@ -1826,8 +2167,10 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                                td->last_ins->data [1] = offset_length;
                                td->last_ins->data [2] = offset_pointer;
 
-                               SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_MP);
-                               td->sp -= 1;
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->ip += 5;
                                return TRUE;
                        }
@@ -1837,7 +2180,10 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                        int offset_length = length_field->offset - sizeof (MonoObject);
                        interp_add_ins (td, MINT_LDLEN_SPAN);
                        td->last_ins->data [0] = offset_length;
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I4);
+                       td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                }
@@ -1850,7 +2196,7 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                else if (!strcmp (tm, "ByteOffset"))
                        *op = MINT_INTRINS_UNSAFE_BYTE_OFFSET;
                else if (!strcmp (tm, "As") || !strcmp (tm, "AsRef"))
-                       *op = MINT_NOP;
+                       *op = MINT_MOV_P;
                else if (!strcmp (tm, "AsPointer")) {
                        /* NOP */
                        SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_MP);
@@ -1864,8 +2210,10 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
 
                        MonoClass *k = mono_defaults.boolean_class;
                        interp_add_ins (td, MINT_CLT_UN_P);
-                       td->sp -= 1;
-                       SET_TYPE (td->sp - 1, stack_type [mint_type (m_class_get_byval_arg (k))], k);
+                       td->sp -= 2;
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                       push_type (td, stack_type [mint_type (m_class_get_byval_arg (k))], k);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                } else if (!strcmp (tm, "SizeOf")) {
@@ -1879,12 +2227,13 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                        interp_add_ins (td, MINT_LDC_I4);
                        WRITE32_INS (td->last_ins, 0, &esize);
                        push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                } else if (!strcmp (tm, "AreSame")) {
                        *op = MINT_CEQ_P;
                } else if (!strcmp (tm, "SkipInit")) {
-                       *op = MINT_POP;
+                       *op = MINT_NOP;
                } else if (!strcmp (tm, "InitBlockUnaligned")) {
                        *op = MINT_INITBLK;
                }
@@ -1897,13 +2246,17 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                        interp_add_ins (td, MINT_LDC_I4);
                        WRITE32_INS (td->last_ins, 0, &offset);
                        push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                } else if (!strcmp (tm, "GetRawData")) {
                        interp_add_ins (td, MINT_LDFLDA_UNSAFE);
                        td->last_ins->data [0] = (gint16) MONO_ABI_SIZEOF (MonoObject);
 
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_MP);
+                       td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_simple_type (td, STACK_TYPE_MP);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 
                        td->ip += 5;
                        return TRUE;
@@ -1972,9 +2325,9 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                        MonoType *base_type = mono_type_get_underlying_type (m_class_get_byval_arg (td->sp [-2].klass));
                        base_klass = mono_class_from_mono_type_internal (base_type);
 
-                       // Remove the boxing of valuetypes
-                       interp_clear_ins (td->last_ins->prev->prev);
-                       interp_clear_ins (td->last_ins);
+                       // Remove the boxing of valuetypes, by replacing them with moves
+                       td->last_ins->prev->prev->opcode = get_mov_for_type (mint_type (base_type), FALSE);
+                       td->last_ins->opcode = get_mov_for_type (mint_type (base_type), FALSE);
 
                        intrinsify = TRUE;
                } else if (td->last_ins && td->last_ins->opcode == MINT_BOX &&
@@ -1988,16 +2341,19 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                        int mt = mint_type (m_class_get_byval_arg (base_klass));
 
                        // Remove boxing and load the value of this
-                       interp_clear_ins (td->last_ins);
-                       interp_insert_ins (td, td->last_ins->prev->prev, interp_get_ldind_for_mt (mt));
-
+                       td->last_ins->opcode = get_mov_for_type (mt, FALSE);
+                       InterpInst *ins = interp_insert_ins (td, td->last_ins->prev->prev, interp_get_ldind_for_mt (mt));
+                       interp_ins_set_sreg (ins, td->sp [-2].local);
+                       interp_ins_set_dreg (ins, td->sp [-2].local);
                        intrinsify = TRUE;
                }
                if (intrinsify) {
                        interp_add_ins (td, MINT_INTRINS_ENUM_HASFLAG);
                        td->last_ins->data [0] = get_data_item_index (td, base_klass);
                        td->sp -= 2;
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                        push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                }
@@ -2041,8 +2397,10 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                                interp_add_ins (td, is_i8 ? MINT_CGT_UN_I8 : MINT_CGT_UN_I4);
                        else
                                interp_add_ins (td, is_i8 ? MINT_CGT_I8 : MINT_CGT_I4);
-                       td->sp --;
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I4);
+                       td->sp -= 2;
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                       push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        // (a < b)
                        load_local (td, locala);
                        load_local (td, localb);
@@ -2050,11 +2408,16 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                                interp_add_ins (td, is_i8 ? MINT_CLT_UN_I8 : MINT_CLT_UN_I4);
                        else
                                interp_add_ins (td, is_i8 ? MINT_CLT_I8 : MINT_CLT_I4);
-                       td->sp --;
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I4);
+                       td->sp -= 2;
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                       push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        // (a > b) - (a < b)
                        interp_add_ins (td, MINT_SUB_I4);
-                       td->sp --;
+                       td->sp -= 2;
+                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                       push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 5;
                        return TRUE;
                } else {
@@ -2394,20 +2757,21 @@ static void
 interp_constrained_box (TransformData *td, MonoDomain *domain, MonoClass *constrained_class, MonoMethodSignature *csignature, MonoError *error)
 {
        int mt = mint_type (m_class_get_byval_arg (constrained_class));
-       int ptr_offset = td->sp [-1 - csignature->param_count].offset; 
+       StackInfo *sp = td->sp - 1 - csignature->param_count;
        if (mono_class_is_nullable (constrained_class)) {
                g_assert (mt == MINT_TYPE_VT);
                interp_add_ins (td, MINT_BOX_NULLABLE_PTR);
                td->last_ins->data [0] = get_data_item_index (td, constrained_class);
-               td->last_ins->data [1] = ptr_offset;
        } else {
                MonoVTable *vtable = mono_class_vtable_checked (domain, constrained_class, error);
                return_if_nok (error);
 
                interp_add_ins (td, MINT_BOX_PTR);
                td->last_ins->data [0] = get_data_item_index (td, vtable);
-               td->last_ins->data [1] = ptr_offset;
        }
+       interp_ins_set_sreg (td->last_ins, sp->local);
+       set_simple_type_and_local (td, sp, STACK_TYPE_O);
+       interp_ins_set_dreg (td->last_ins, sp->local);
 }
 
 static MonoMethod*
@@ -2432,6 +2796,7 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
        int op = -1;
        int native = 0;
        int need_null_check = is_virtual;
+       int fp_sreg = -1, first_sreg = -1, dreg = -1;
        gboolean is_delegate_invoke = FALSE;
 
        guint32 token = read32 (td->ip + 1);
@@ -2523,9 +2888,12 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
 
                // Follow the rules for constrained calls from ECMA spec
                if (!m_class_is_valuetype (constrained_class)) {
+                       StackInfo *sp = td->sp - 1 - csignature->param_count;
                        /* managed pointer on the stack, we need to deref that puppy */
                        interp_add_ins (td, MINT_LDIND_I);
-                       td->last_ins->data [0] = td->sp [-1 - csignature->param_count].offset;
+                       interp_ins_set_sreg (td->last_ins, sp->local);
+                       set_simple_type_and_local (td, sp, STACK_TYPE_I);
+                       interp_ins_set_dreg (td->last_ins, sp->local);
                } else if (target_method->klass != constrained_class) {
                        /*
                         * The type parameter is instantiated as a valuetype,
@@ -2595,8 +2963,11 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
                csignature = mono_method_get_signature_checked (target_method, image, token, generic_context, error);
 
        if (need_null_check) {
-               interp_add_ins (td, MINT_CKNULL_N);
-               td->last_ins->data [0] = td->sp [-1 - csignature->param_count].offset;
+               StackInfo *sp = td->sp - 1 - csignature->param_count;
+               interp_add_ins (td, MINT_CKNULL);
+               interp_ins_set_sreg (td->last_ins, sp->local);
+               set_simple_type_and_local (td, sp, sp->type);
+               interp_ins_set_dreg (td->last_ins, sp->local);
        }
 
        g_assert (csignature->call_convention != MONO_CALL_FASTCALL);
@@ -2622,37 +2993,78 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
        }
 
        /* Pop the function pointer */
-       if (calli)
+       if (calli) {
                --td->sp;
+               fp_sreg = td->sp [0].local;
+               td->locals [fp_sreg].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+       }
 
        guint32 tos_offset = get_tos_offset (td);
        td->sp -= csignature->param_count + !!csignature->hasthis;
        guint32 params_stack_size = tos_offset - get_tos_offset (td);
 
+       if (op == -1 || mono_interp_op_dregs [op] == MINT_CALL_ARGS) {
+               // We must not optimize out these locals, storing to them is part of the interp call convention
+               // unless we already intrinsified this call
+               for (int i = 0; i < (csignature->param_count + !!csignature->hasthis); i++)
+                       td->locals [td->sp [i].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+       }
+
+       // We overwrite it with the return local, save it for future use
+       if (csignature->param_count || csignature->hasthis)
+               first_sreg = td->sp [0].local;
+
        /* need to handle typedbyref ... */
        if (csignature->ret->type != MONO_TYPE_VOID) {
                int mt = mint_type(csignature->ret);
                MonoClass *klass = mono_class_from_mono_type_internal (csignature->ret);
+
                if (mt == MINT_TYPE_VT) {
                        if (csignature->pinvoke && method->wrapper_type != MONO_WRAPPER_NONE)
                                res_size = mono_class_native_size (klass, NULL);
                        else
                                res_size = mono_class_value_size (klass, NULL);
+                       push_type_vt (td, klass, res_size);
                        res_size = ALIGN_TO (res_size, MINT_VT_ALIGNMENT);
                        if (mono_class_has_failure (klass)) {
                                mono_error_set_for_class_failure (error, klass);
                                return FALSE;
                        }
-                       push_type_vt (td, klass, res_size);
                } else {
                        push_type (td, stack_type[mt], klass);
                        res_size = MINT_STACK_SLOT_SIZE;
                }
+               dreg = td->sp [-1].local;
+               if (op == -1 || mono_interp_op_dregs [op] == MINT_CALL_ARGS) {
+                       // This dreg needs to be at the same offset as the call args
+                       td->locals [dreg].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+               }
+       } else {
+               // Create a new dummy local to serve as the dreg of the call
+               // This dreg is only used to resolve the call args offset
+               push_simple_type (td, STACK_TYPE_I4);
+               td->sp--;
+               dreg = td->sp [0].local;
        }
 
        if (op >= 0) {
                interp_add_ins (td, op);
 
+               int has_dreg = mono_interp_op_dregs [op];
+               int num_sregs = mono_interp_op_sregs [op];
+               if (has_dreg)
+                       interp_ins_set_dreg (td->last_ins, dreg);
+               if (num_sregs > 0) {
+                       if (num_sregs == 1)
+                               interp_ins_set_sreg (td->last_ins, first_sreg);
+                       else if (num_sregs == 2)
+                               interp_ins_set_sregs2 (td->last_ins, first_sreg, td->sp [!has_dreg].local);
+                       else if (num_sregs == 3)
+                               interp_ins_set_sregs3 (td->last_ins, first_sreg, td->sp [!has_dreg].local, td->sp [!has_dreg + 1].local);
+                       else
+                               g_error ("Unsupported opcode");
+               }
+               
                if (op == MINT_LDLEN) {
 #ifdef MONO_BIG_ARRAYS
                        SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I8);
@@ -2663,35 +3075,42 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
 
 #ifndef ENABLE_NETCORE
                if (op == MINT_CALLRUN) {
+                       interp_ins_set_dreg (td->last_ins, dreg);
                        td->last_ins->data [0] = get_data_item_index (td, target_method);
                        td->last_ins->data [1] = get_data_item_index (td, mono_method_signature_internal (target_method));
                }
 #endif
        } else if (!calli && !is_delegate_invoke && !is_virtual && mono_interp_jit_call_supported (target_method, csignature)) {
                interp_add_ins (td, MINT_JIT_CALL);
+               interp_ins_set_dreg (td->last_ins, dreg);
                td->last_ins->data [0] = get_data_item_index (td, (void *)mono_interp_get_imethod (domain, target_method, error));
                mono_error_assert_ok (error);
-               td->last_ins->data [1] = params_stack_size;
        } else {
                if (is_delegate_invoke) {
                        interp_add_ins (td, MINT_CALL_DELEGATE);
-                       td->last_ins->data [0] = get_data_item_index (td, (void *)csignature);
-                       td->last_ins->data [1] = params_stack_size;
+                       interp_ins_set_dreg (td->last_ins, dreg);
+                       td->last_ins->data [0] = params_stack_size;
+                       td->last_ins->data [1] = get_data_item_index (td, (void *)csignature);
                } else if (calli) {
 #ifndef MONO_ARCH_HAS_NO_PROPER_MONOCTX
                        /* Try using fast icall path for simple signatures */
                        if (native && !method->dynamic)
                                op = interp_icall_op_for_sig (csignature);
 #endif
+                       // FIXME calli receives both the args offset and sometimes another arg for the frame pointer,
+                       // therefore some args are in the param area, while the fp is not. We should differentiate for
+                       // this, probably once we will have an explicit param area where we copy arguments.
                        if (op != -1) {
                                interp_add_ins (td, MINT_CALLI_NAT_FAST);
+                               interp_ins_set_dreg (td->last_ins, dreg);
                                td->last_ins->data [0] = get_data_item_index (td, (void *)csignature);
                                td->last_ins->data [1] = op;
                                td->last_ins->data [2] = save_last_error;
                        } else if (native && method->dynamic && csignature->pinvoke) {
                                interp_add_ins (td, MINT_CALLI_NAT_DYNAMIC);
+                               interp_ins_set_dreg (td->last_ins, dreg);
+                               interp_ins_set_sreg (td->last_ins, fp_sreg);
                                td->last_ins->data [0] = get_data_item_index (td, (void *)csignature);
-                               td->last_ins->data [1] = params_stack_size;
                        } else if (native) {
                                interp_add_ins (td, MINT_CALLI_NAT);
 #ifdef TARGET_X86
@@ -2714,17 +3133,18 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
                                        }
                                }
 
+                               interp_ins_set_dreg (td->last_ins, dreg);
+                               interp_ins_set_sreg (td->last_ins, fp_sreg);
                                td->last_ins->data [0] = get_data_item_index (td, csignature);
                                td->last_ins->data [1] = get_data_item_index (td, imethod);
-                               td->last_ins->data [2] = params_stack_size;
-                               td->last_ins->data [3] = res_size;
-                               td->last_ins->data [4] = save_last_error;
+                               td->last_ins->data [2] = save_last_error;
                                /* Cache slot */
-                               td->last_ins->data [5] = get_data_item_index_nonshared (td, NULL);
+                               td->last_ins->data [3] = get_data_item_index_nonshared (td, NULL);
                        } else {
                                interp_add_ins (td, MINT_CALLI);
+                               interp_ins_set_dreg (td->last_ins, dreg);
+                               interp_ins_set_sreg (td->last_ins, fp_sreg);
                                td->last_ins->data [0] = get_data_item_index (td, (void *)csignature);
-                               td->last_ins->data [1] = params_stack_size;
                        }
                } else {
                        InterpMethod *imethod = mono_interp_get_imethod (domain, target_method, error);
@@ -2732,20 +3152,21 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
 
                        if (csignature->call_convention == MONO_CALL_VARARG) {
                                interp_add_ins (td, MINT_CALL_VARARG);
-                               td->last_ins->data [2] = get_data_item_index (td, (void *)csignature);
+                               td->last_ins->data [1] = get_data_item_index (td, (void *)csignature);
+                               td->last_ins->data [2] = params_stack_size;
                        } else if (is_virtual && !mono_class_is_marshalbyref (target_method->klass)) {
                                interp_add_ins (td, MINT_CALLVIRT_FAST);
                                if (mono_class_is_interface (target_method->klass))
-                                       td->last_ins->data [2] = -2 * MONO_IMT_SIZE + mono_method_get_imt_slot (target_method);
+                                       td->last_ins->data [1] = -2 * MONO_IMT_SIZE + mono_method_get_imt_slot (target_method);
                                else
-                                       td->last_ins->data [2] = mono_method_get_vtable_slot (target_method);
+                                       td->last_ins->data [1] = mono_method_get_vtable_slot (target_method);
                        } else if (is_virtual) {
                                interp_add_ins (td, MINT_CALLVIRT);
                        } else {
                                interp_add_ins (td, MINT_CALL);
                        }
+                       interp_ins_set_dreg (td->last_ins, dreg);
                        td->last_ins->data [0] = get_data_item_index (td, (void *)imethod);
-                       td->last_ins->data [1] = params_stack_size;
 
 #ifdef ENABLE_EXPERIMENT_TIERED
                        if (MINT_IS_PATCHABLE_CALL (td->last_ins->opcode)) {
@@ -3168,15 +3589,17 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
                else
                        type = mono_method_signature_internal (td->method)->params [i - sig->hasthis];
                int mt = mint_type (type);
+               td->locals [i].type = type;
                td->locals [i].offset = offset;
                td->locals [i].flags = 0;
                td->locals [i].indirects = 0;
-               td->locals [i].type = type;
                td->locals [i].mt = mt;
                if (mt == MINT_TYPE_VT && (!sig->hasthis || i != 0)) {
                        size = mono_type_size (type, &align);
+                       td->locals [i].size = size;
                        offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
                } else {
+                       td->locals [i].size = MINT_STACK_SLOT_SIZE; // not really
                        offset += MINT_STACK_SLOT_SIZE;
                }
        }
@@ -3194,12 +3617,17 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
                offset += align - 1;
                offset &= ~(align - 1);
                imethod->local_offsets [i] = offset;
+               td->locals [index].type = header->locals [i];
                td->locals [index].offset = offset;
                td->locals [index].flags = 0;
                td->locals [index].indirects = 0;
-               td->locals [index].type = header->locals [i];
                td->locals [index].mt = mint_type (header->locals [i]);
-               offset += size;
+               if (td->locals [index].mt == MINT_TYPE_VT)
+                       td->locals [index].size = size;
+               else
+                       td->locals [index].size = MINT_STACK_SLOT_SIZE; // not really
+               // Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals
+               offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
        }
        offset = ALIGN_TO (offset, MINT_VT_ALIGNMENT);
        td->il_locals_size = offset - td->il_locals_offset;
@@ -3270,7 +3698,15 @@ interp_handle_isinst (TransformData *td, MonoClass *klass, gboolean isinst_instr
        } else {
                interp_add_ins (td, isinst_instr ? MINT_ISINST : MINT_CASTCLASS);
        }
+       td->sp--;
+       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+       if (isinst_instr)
+               push_type (td, td->sp [0].type, td->sp [0].klass);
+       else
+               push_type (td, STACK_TYPE_O, klass);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
        td->last_ins->data [0] = get_data_item_index (td, klass);
+
        td->ip += 5;
 }
 
@@ -3282,6 +3718,7 @@ interp_emit_ldsflda (TransformData *td, MonoClassField *field, MonoError *error)
        MonoVTable *vtable = mono_class_vtable_checked (domain, field->parent, error);
        return_if_nok (error);
 
+       push_simple_type (td, STACK_TYPE_MP);
        if (mono_class_field_is_special_static (field)) {
                guint32 offset;
 
@@ -3292,9 +3729,11 @@ interp_emit_ldsflda (TransformData *td, MonoClassField *field, MonoError *error)
                g_assert (offset);
 
                interp_add_ins (td, MINT_LDSSFLDA);
+               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                WRITE32_INS(td->last_ins, 0, &offset);
        } else {
                interp_add_ins (td, MINT_LDSFLDA);
+               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                td->last_ins->data [0] = get_data_item_index (td, vtable);
                td->last_ins->data [1] = get_data_item_index (td, (char*)mono_vtable_get_static_field_data (vtable) + field->offset);
        }
@@ -3303,6 +3742,10 @@ interp_emit_ldsflda (TransformData *td, MonoClassField *field, MonoError *error)
 static gboolean
 interp_emit_load_const (TransformData *td, gpointer field_addr, int mt)
 {
+       if (mt == MINT_TYPE_VT)
+               return FALSE;
+
+       push_simple_type (td, stack_type [mt]);
        if ((mt >= MINT_TYPE_I1 && mt <= MINT_TYPE_I4)) {
                gint32 val;
                switch (mt) {
@@ -3321,20 +3764,25 @@ interp_emit_load_const (TransformData *td, gpointer field_addr, int mt)
                default:
                        val = *(gint32*)field_addr;
                }
-               interp_get_ldc_i4_from_const (td, NULL, val);
+               interp_get_ldc_i4_from_const (td, NULL, val, td->sp [-1].local);
        } else if (mt == MINT_TYPE_I8) {
                gint64 val = *(gint64*)field_addr;
                interp_add_ins (td, MINT_LDC_I8);
+               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                WRITE64_INS (td->last_ins, 0, &val);
        } else if (mt == MINT_TYPE_R4) {
                float val = *(float*)field_addr;
                interp_add_ins (td, MINT_LDC_R4);
+               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                WRITE32_INS (td->last_ins, 0, &val);
        } else if (mt == MINT_TYPE_R8) {
                double val = *(double*)field_addr;
                interp_add_ins (td, MINT_LDC_R8);
+               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                WRITE64_INS (td->last_ins, 0, &val);
        } else {
+               // Revert stack
+               td->sp--;
                return FALSE;
        }
        return TRUE;
@@ -3355,7 +3803,11 @@ emit_convert (TransformData *td, int stype, MonoType *ftype)
        case MONO_TYPE_I8: {
                switch (stype) {
                case STACK_TYPE_I4:
+                       td->sp--;
                        interp_add_ins (td, MINT_CONV_I8_I4);
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_simple_type (td, STACK_TYPE_I8);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        break;
                default:
                        break;
@@ -3387,41 +3839,76 @@ interp_emit_sfld_access (TransformData *td, MonoClassField *field, MonoClass *fi
                // Offset is SpecialStaticOffset
                if ((offset & 0x80000000) == 0 && mt != MINT_TYPE_VT) {
                        // This field is thread static
-                       interp_add_ins (td, (is_load ? MINT_LDTSFLD_I1 : MINT_STTSFLD_I1) + mt);
-                       WRITE32_INS(td->last_ins, 0, &offset);
+                       if (is_load) {
+                               interp_add_ins (td, MINT_LDTSFLD_I1 + mt);
+                               WRITE32_INS(td->last_ins, 0, &offset);
+                               push_type (td, stack_type [mt], field_class);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       } else {
+                               interp_add_ins (td, MINT_STTSFLD_I1 + mt);
+                               WRITE32_INS(td->last_ins, 0, &offset);
+                               td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       }
                } else {
                        if (mt == MINT_TYPE_VT) {
-                               interp_add_ins (td, is_load ? MINT_LDSSFLD_VT : MINT_STSSFLD_VT);
-                               WRITE32_INS(td->last_ins, 0, &offset);
-
                                int size = mono_class_value_size (field_class, NULL);
-                               WRITE32_INS(td->last_ins, 2, &size);
+                               g_assert (size < G_MAXUINT16);
+                               if (is_load) {
+                                       interp_add_ins (td, MINT_LDSSFLD_VT);
+                                       push_type_vt (td, field_class, size);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               } else {
+                                       interp_add_ins (td, MINT_STSSFLD_VT);
+                                       td->sp--;
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               }
+                               WRITE32_INS(td->last_ins, 0, &offset);
+                               td->last_ins->data [2] = size;
                        } else {
-                               interp_add_ins (td, is_load ? MINT_LDSSFLD : MINT_STSSFLD);
+                               if (is_load) {
+                                       interp_add_ins (td, MINT_LDSSFLD);
+                                       push_type (td, stack_type [mt], field_class);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               } else {
+                                       interp_add_ins (td, MINT_STSSFLD);
+                                       td->sp--;
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               }
                                td->last_ins->data [0] = get_data_item_index (td, field);
                                WRITE32_INS(td->last_ins, 1, &offset);
                        }
                }
        } else {
                gpointer field_addr = (char*)mono_vtable_get_static_field_data (vtable) + field->offset;
+               int size = 0;
+               if (mt == MINT_TYPE_VT)
+                       size = mono_class_value_size (field_class, NULL);
                if (is_load) {
                        MonoType *ftype = mono_field_get_type_internal (field);
                        if (ftype->attrs & FIELD_ATTRIBUTE_INIT_ONLY && vtable->initialized) {
                                if (interp_emit_load_const (td, field_addr, mt))
                                        return;
                        }
-                       interp_add_ins (td, (mt == MINT_TYPE_VT) ? MINT_LDSFLD_VT : (MINT_LDSFLD_I1 + mt - MINT_TYPE_I1));
+                       if (mt == MINT_TYPE_VT) {
+                               interp_add_ins (td, MINT_LDSFLD_VT);
+                               push_type_vt (td, field_class, size);
+                       } else {
+                               interp_add_ins (td, MINT_LDSFLD_I1 + mt - MINT_TYPE_I1);
+                               push_type (td, stack_type [mt], field_class);
+                       }
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                } else {
                        interp_add_ins (td, (mt == MINT_TYPE_VT) ? MINT_STSFLD_VT : (MINT_STSFLD_I1 + mt - MINT_TYPE_I1));
+                       td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                }
 
                td->last_ins->data [0] = get_data_item_index (td, vtable);
                td->last_ins->data [1] = get_data_item_index (td, (char*)field_addr);
+               if (mt == MINT_TYPE_VT)
+                       td->last_ins->data [2] = size;
 
-               if (mt == MINT_TYPE_VT) {
-                       int size = mono_class_value_size (field_class, NULL);
-                       WRITE32_INS(td->last_ins, 2, &size);
-               }
        }
 }
 
@@ -3460,6 +3947,8 @@ initialize_clause_bblocks (TransformData *td)
                        bb->stack_state [0].type = STACK_TYPE_O;
                        bb->stack_state [0].klass = NULL; /*FIX*/
                        bb->stack_state [0].size = MINT_STACK_SLOT_SIZE;
+                       bb->stack_state [0].offset = 0;
+                       bb->stack_state [0].local = create_interp_stack_local (td, STACK_TYPE_O, NULL, MINT_STACK_SLOT_SIZE, 0);
                }
 
                if (c->flags == MONO_EXCEPTION_CLAUSE_FILTER) {
@@ -3471,6 +3960,8 @@ initialize_clause_bblocks (TransformData *td)
                        bb->stack_state [0].type = STACK_TYPE_O;
                        bb->stack_state [0].klass = NULL; /*FIX*/
                        bb->stack_state [0].size = MINT_STACK_SLOT_SIZE;
+                       bb->stack_state [0].offset = 0;
+                       bb->stack_state [0].local = create_interp_stack_local (td, STACK_TYPE_O, NULL, MINT_STACK_SLOT_SIZE, 0);
                } else if (c->flags == MONO_EXCEPTION_CLAUSE_NONE) {
                        /*
                         * JIT doesn't emit sdb seq intr point at the start of catch clause, probably
@@ -3483,6 +3974,62 @@ initialize_clause_bblocks (TransformData *td)
 
 }
 
+static void
+handle_ldind (TransformData *td, int op, int type, gboolean *volatile_)
+{
+       CHECK_STACK (td, 1);
+       interp_add_ins (td, op);
+       td->sp--;
+       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+       push_simple_type (td, type);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+
+       if (*volatile_) {
+               interp_emit_memory_barrier (td, MONO_MEMORY_BARRIER_ACQ);
+               *volatile_ = FALSE;
+       }
+       ++td->ip;
+}
+
+static void
+handle_stind (TransformData *td, int op, gboolean *volatile_)
+{
+       CHECK_STACK (td, 2);
+       if (*volatile_) {
+               interp_emit_memory_barrier (td, MONO_MEMORY_BARRIER_REL);
+               *volatile_ = FALSE;
+       }
+       interp_add_ins (td, op);
+       td->sp -= 2;
+       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+
+       ++td->ip;
+}
+
+static void
+handle_ldelem (TransformData *td, int op, int type)
+{
+       CHECK_STACK (td, 2);
+       ENSURE_I4 (td, 1);
+       interp_add_ins (td, op);
+       td->sp -= 2;
+       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+       push_simple_type (td, type);
+       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+       ++td->ip;
+}
+
+static void
+handle_stelem (TransformData *td, int op)
+{
+       CHECK_STACK (td, 3);
+       ENSURE_I4 (td, 2);
+       interp_add_ins (td, op);
+       td->sp -= 3;
+       interp_ins_set_sregs3 (td->last_ins, td->sp [0].local, td->sp [1].local, td->sp [2].local);
+       ++td->ip;
+}
+
 static gboolean
 generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, MonoGenericContext *generic_context, MonoError *error)
 {
@@ -3581,8 +4128,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                last_seq_point->flags |= INTERP_INST_FLAG_SEQ_POINT_METHOD_ENTRY;
        }
 
-       if (mono_debugger_method_has_breakpoint (method))
+       if (mono_debugger_method_has_breakpoint (method)) {
                interp_add_ins (td, MINT_BREAKPOINT);
+       }
 
        if (!inlining) {
                if (td->verbose_level) {
@@ -3601,11 +4149,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        // is localloc'ed so we have compile time static offsets for all locals/stack.
                        arglist_local = create_interp_local (td, m_class_get_byval_arg (mono_defaults.int_class));
                        interp_add_ins (td, MINT_INIT_ARGLIST);
-                       td->last_ins->data [0] = arglist_local;
+                       interp_ins_set_dreg (td->last_ins, arglist_local);
                        // This is the offset where the variable args are on stack. After this instruction
                        // which copies them to localloc'ed memory, this space will be overwritten by normal
                        // locals
-                       td->last_ins->data [1] = td->il_locals_offset;
+                       td->last_ins->data [0] = td->il_locals_offset;
                        td->has_localloc = TRUE;
                }
 
@@ -3693,6 +4241,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                 * instruction is an unconditional branch (BR, LEAVE, ENDFINALLY)
                                 */
                                interp_link_bblocks (td, td->cbb, new_bb);
+                               fixup_newbb_stack_locals (td, new_bb);
                        }
                        td->cbb->next_bb = new_bb;
                        td->cbb = new_bb;
@@ -3730,7 +4279,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
                        if (td->verbose_level > 1)
                                g_print ("SKIPPING DEAD OP at %x\n", in_offset);
-
+                       link_bblocks = FALSE;
                        td->ip += op_size;
                        continue;
                }
@@ -3765,7 +4314,8 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        ++td->ip;
                        break;
                case CEE_BREAK:
-                       SIMPLE_OP(td, MINT_BREAK);
+                       interp_add_ins (td, MINT_BREAK);
+                       ++td->ip;
                        break;
                case CEE_LDARG_0:
                case CEE_LDARG_1:
@@ -3818,15 +4368,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
                        if (!inlining) {
                                interp_add_ins (td, MINT_LDLOCA_S);
-                               td->last_ins->data [0] = n;
+                               interp_ins_set_sreg (td->last_ins, n);
                                td->locals [n].indirects++;
                        } else {
                                int loc_n = arg_locals [n];
                                interp_add_ins (td, MINT_LDLOCA_S);
-                               td->last_ins->data [0] = loc_n;
+                               interp_ins_set_sreg (td->last_ins, loc_n);
                                td->locals [loc_n].indirects++;
                        }
                        push_simple_type (td, STACK_TYPE_MP);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 2;
                        break;
                }
@@ -3855,9 +4406,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                loc_n += num_args;
                        else
                                loc_n = local_locals [loc_n];
-                       td->last_ins->data [0] = loc_n;
+                       interp_ins_set_sreg (td->last_ins, loc_n);
                        td->locals [loc_n].indirects++;
                        push_simple_type (td, STACK_TYPE_MP);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->ip += 2;
                        break;
                }
@@ -3871,31 +4423,47 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        break;
                }
                case CEE_LDNULL: 
-                       SIMPLE_OP(td, MINT_LDNULL);
+                       interp_add_ins (td, MINT_LDNULL);
                        push_type (td, STACK_TYPE_O, NULL);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       ++td->ip;
                        break;
                case CEE_LDC_I4_M1:
-                       SIMPLE_OP(td, MINT_LDC_I4_M1);
+                       interp_add_ins (td, MINT_LDC_I4_M1);
                        push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       ++td->ip;
                        break;
                case CEE_LDC_I4_0:
                        if (in_offset + 2 < td->code_size && interp_ip_in_cbb (td, in_offset + 1) && td->ip [1] == 0xfe && td->ip [2] == CEE_CEQ &&
                                td->sp > td->stack && td->sp [-1].type == STACK_TYPE_I4) {
-                               SIMPLE_OP(td, MINT_CEQ0_I4);
-                               td->ip += 2;
+                               interp_add_ins (td, MINT_CEQ0_I4);
+                               td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->ip += 3;
                        } else {
-                               SIMPLE_OP(td, MINT_LDC_I4_0);
+                               interp_add_ins (td, MINT_LDC_I4_0);
                                push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               ++td->ip;
                        }
                        break;
                case CEE_LDC_I4_1:
                        if (in_offset + 1 < td->code_size && interp_ip_in_cbb (td, in_offset + 1) &&
                                (td->ip [1] == CEE_ADD || td->ip [1] == CEE_SUB) && td->sp [-1].type == STACK_TYPE_I4) {
                                interp_add_ins (td, td->ip [1] == CEE_ADD ? MINT_ADD1_I4 : MINT_SUB1_I4);
+                               td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->ip += 2;
                        } else {
-                               SIMPLE_OP(td, MINT_LDC_I4_1);
+                               interp_add_ins (td, MINT_LDC_I4_1);
                                push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               ++td->ip;
                        }
                        break;
                case CEE_LDC_I4_2:
@@ -3905,28 +4473,33 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                case CEE_LDC_I4_6:
                case CEE_LDC_I4_7:
                case CEE_LDC_I4_8:
-                       SIMPLE_OP(td, (*td->ip - CEE_LDC_I4_0) + MINT_LDC_I4_0);
+                       interp_add_ins (td, (*td->ip - CEE_LDC_I4_0) + MINT_LDC_I4_0);
                        push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       ++td->ip;
                        break;
                case CEE_LDC_I4_S: 
                        interp_add_ins (td, MINT_LDC_I4_S);
                        td->last_ins->data [0] = ((gint8 *) td->ip) [1];
-                       td->ip += 2;
                        push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       td->ip += 2;
                        break;
                case CEE_LDC_I4:
                        i32 = read32 (td->ip + 1);
                        interp_add_ins (td, MINT_LDC_I4);
                        WRITE32_INS (td->last_ins, 0, &i32);
-                       td->ip += 5;
                        push_simple_type (td, STACK_TYPE_I4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       td->ip += 5;
                        break;
                case CEE_LDC_I8: {
                        gint64 val = read64 (td->ip + 1);
                        interp_add_ins (td, MINT_LDC_I8);
                        WRITE64_INS (td->last_ins, 0, &val);
-                       td->ip += 9;
                        push_simple_type (td, STACK_TYPE_I8);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       td->ip += 9;
                        break;
                }
                case CEE_LDC_R4: {
@@ -3934,8 +4507,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        readr4 (td->ip + 1, &val);
                        interp_add_ins (td, MINT_LDC_R4);
                        WRITE32_INS (td->last_ins, 0, &val);
-                       td->ip += 5;
                        push_simple_type (td, STACK_TYPE_R4);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       td->ip += 5;
                        break;
                }
                case CEE_LDC_R8: {
@@ -3943,37 +4517,38 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        readr8 (td->ip + 1, &val);
                        interp_add_ins (td, MINT_LDC_R8);
                        WRITE64_INS (td->last_ins, 0, &val);
-                       td->ip += 9;
                        push_simple_type (td, STACK_TYPE_R8);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       td->ip += 9;
                        break;
                }
                case CEE_DUP: {
                        int type = td->sp [-1].type;
                        MonoClass *klass = td->sp [-1].klass;
-                       if (td->sp [-1].type == STACK_TYPE_VT) {
+                       int mt = td->locals [td->sp [-1].local].mt;
+                       if (mt == MINT_TYPE_VT) {
                                gint32 size = mono_class_value_size (klass, NULL);
-                               interp_add_ins (td, MINT_DUP_VT);
-                               WRITE32_INS (td->last_ins, 0, &size);
-                               td->ip ++;
+                               g_assert (size < G_MAXUINT16);
+
+                               interp_add_ins (td, MINT_MOV_VT);
+                               interp_ins_set_sreg (td->last_ins, td->sp [-1].local);
                                push_type_vt (td, klass, size);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->last_ins->data [0] = size;
                        } else  {
-                               SIMPLE_OP(td, MINT_DUP);
+                               interp_add_ins (td, get_mov_for_type (mt, FALSE));
+                               interp_ins_set_sreg (td->last_ins, td->sp [-1].local);
                                push_type (td, type, klass);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        }
+                       td->ip++;
                        break;
                }
                case CEE_POP:
                        CHECK_STACK(td, 1);
-                       if (td->sp [-1].type == STACK_TYPE_VT) {
-                               int size = mono_class_value_size (td->sp [-1].klass, NULL);
-                               size = ALIGN_TO (size, MINT_VT_ALIGNMENT);
-                               interp_add_ins (td, MINT_POP_VT);
-                               WRITE32_INS (td->last_ins, 0, &size);
-                               td->ip++;
-                       } else {
-                               SIMPLE_OP(td, MINT_POP);
-                       }
+                       interp_add_ins (td, MINT_NOP);
                        --td->sp;
+                       ++td->ip;
                        break;
                case CEE_JMP: {
                        MonoMethod *m;
@@ -4027,6 +4602,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        /* Return from inlined method, return value is on top of stack */
                        if (inlining) {
                                td->ip++;
+                               fixup_newbb_stack_locals (td, exit_bb);
                                interp_add_ins (td, MINT_BR_S);
                                td->last_ins->info.target_bb = exit_bb;
                                init_bb_stack_state (td, exit_bb);
@@ -4061,24 +4637,32 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                exit_profiling |= PROFILING_FLAG;
                        if (exit_profiling) {
                                /* This does the return as well */
+                               interp_add_ins (td, MINT_PROF_EXIT);
                                if (ult->type == MONO_TYPE_VOID) {
-                                       interp_add_ins (td, MINT_PROF_EXIT_VOID);
                                        vt_size = -1;
+                                       interp_ins_set_sreg (td->last_ins, -1);
                                } else {
-                                       interp_add_ins (td, MINT_PROF_EXIT);
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                }
+                                       
                                td->last_ins->data [0] = exit_profiling;
                                WRITE32_INS (td->last_ins, 1, &vt_size);
-                               ++td->ip;
                        } else {
-                               if (vt_size == 0)
-                                       SIMPLE_OP(td, ult->type == MONO_TYPE_VOID ? MINT_RET_VOID : MINT_RET);
-                               else {
+                               if (vt_size == 0) {
+                                       if (ult->type == MONO_TYPE_VOID) {
+                                               interp_add_ins (td, MINT_RET_VOID);
+                                       } else {
+                                               interp_add_ins (td, MINT_RET);
+                                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                                       }
+                               } else {
                                        interp_add_ins (td, MINT_RET_VT);
-                                       WRITE32_INS (td->last_ins, 0, &vt_size);
-                                       ++td->ip;
+                                       g_assert (vt_size < G_MAXUINT16);
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                                       td->last_ins->data [0] = vt_size;
                                }
                        }
+                       ++td->ip;
                        break;
                }
                case CEE_BR: {
@@ -4205,6 +4789,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        td->ip += 4;
                        next_ip = td->ip + n * 4;
                        --td->sp;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                        InterpBasicBlock **target_bb_table = (InterpBasicBlock**)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock*) * n);
                        for (i = 0; i < n; i++) {
                                offset = read32 (td->ip);
@@ -4227,118 +4812,61 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        break;
                }
                case CEE_LDIND_I1:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_I1_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_I1_CHECK, STACK_TYPE_I4, &volatile_);
                        break;
                case CEE_LDIND_U1:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_U1_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_U1_CHECK, STACK_TYPE_I4, &volatile_);
                        break;
                case CEE_LDIND_I2:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_I2_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_I2_CHECK, STACK_TYPE_I4, &volatile_);
                        break;
                case CEE_LDIND_U2:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_U2_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_U2_CHECK, STACK_TYPE_I4, &volatile_);
                        break;
                case CEE_LDIND_I4:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_I4_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_I4_CHECK, STACK_TYPE_I4, &volatile_);
                        break;
                case CEE_LDIND_U4:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_U4_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_U4_CHECK, STACK_TYPE_I4, &volatile_);
                        break;
                case CEE_LDIND_I8:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_I8_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_I8_CHECK, STACK_TYPE_I8, &volatile_);
                        break;
                case CEE_LDIND_I:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_REF_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_REF_CHECK, STACK_TYPE_I, &volatile_);
                        break;
                case CEE_LDIND_R4:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_R4_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R4);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_R4_CHECK, STACK_TYPE_R4, &volatile_);
                        break;
                case CEE_LDIND_R8:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_R8_CHECK);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R8);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
+                       handle_ldind (td, MINT_LDIND_R8_CHECK, STACK_TYPE_R8, &volatile_);
                        break;
                case CEE_LDIND_REF:
-                       CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDIND_REF_CHECK);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_O);
+                       handle_ldind (td, MINT_LDIND_REF_CHECK, STACK_TYPE_O, &volatile_);
                        break;
                case CEE_STIND_REF:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_REF);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_REF, &volatile_);
                        break;
                case CEE_STIND_I1:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_I1);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_I1, &volatile_);
                        break;
                case CEE_STIND_I2:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_I2);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_I2, &volatile_);
                        break;
                case CEE_STIND_I4:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_I4);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_I4, &volatile_);
                        break;
                case CEE_STIND_I:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_I);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_I, &volatile_);
                        break;
                case CEE_STIND_I8:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_I8);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_I8, &volatile_);
                        break;
                case CEE_STIND_R4:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_R4);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_R4, &volatile_);
                        break;
                case CEE_STIND_R8:
-                       CHECK_STACK (td, 2);
-                       BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
-                       SIMPLE_OP (td, MINT_STIND_R8);
-                       td->sp -= 2;
+                       handle_stind (td, MINT_STIND_R8, &volatile_);
                        break;
                case CEE_ADD:
                        binary_arith_op(td, MINT_ADD_I4);
@@ -4404,203 +4932,200 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_U1_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U1_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_U1_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U1_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_U1_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U1_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_U1_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U1_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                case CEE_CONV_I1:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_I1_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I1_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_I1_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I1_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_I1_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I1_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_I1_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I1_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                case CEE_CONV_U2:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_U2_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U2_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_U2_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U2_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_U2_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U2_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_U2_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U2_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                case CEE_CONV_I2:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_I2_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I2_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_I2_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I2_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_I2_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I2_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_I2_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I2_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                case CEE_CONV_U:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R8:
 #if SIZEOF_VOID_P == 4
-                               interp_add_ins (td, MINT_CONV_U4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_U4_R8);
 #else
-                               interp_add_ins (td, MINT_CONV_U8_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_U8_R8);
 #endif
                                break;
                        case STACK_TYPE_I4:
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_I8_U4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_I8_U4);
 #endif
                                break;
                        case STACK_TYPE_I8:
 #if SIZEOF_VOID_P == 4
-                               interp_add_ins (td, MINT_CONV_U4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_U4_I8);
 #endif
                                break;
                        case STACK_TYPE_MP:
                        case STACK_TYPE_O:
+                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I);
                        break;
                case CEE_CONV_I: 
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R8:
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_I8_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_I8_R8);
 #else
-                               interp_add_ins (td, MINT_CONV_I4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_I4_R8);
 #endif
                                break;
                        case STACK_TYPE_I4:
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_I8_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_I8_I4);
 #endif
                                break;
                        case STACK_TYPE_O:
-                               break;
                        case STACK_TYPE_MP:
+                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I);
                                break;
                        case STACK_TYPE_I8:
 #if SIZEOF_VOID_P == 4
-                               interp_add_ins (td, MINT_CONV_I4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_I4_I8);
 #endif
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I);
                        break;
                case CEE_CONV_U4:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_U4_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U4_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_U4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U4_R8);
                                break;
                        case STACK_TYPE_I4:
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_U4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U4_I8);
                                break;
                        case STACK_TYPE_MP:
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_U4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_U4_I8);
+#else
+                               SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I4);
 #endif
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                case CEE_CONV_I4:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_I4_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I4_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_I4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I4_R8);
                                break;
                        case STACK_TYPE_I4:
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_I4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I4_I8);
                                break;
                        case STACK_TYPE_MP:
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_I4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I4_I8);
+#else
+                               SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I4);
 #endif
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                case CEE_CONV_I8:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_I8_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_I8_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_R8);
                                break;
                        case STACK_TYPE_I4: {
                                if (interp_ins_is_ldc (td->last_ins) && td->last_ins == td->cbb->last_ins) {
@@ -4608,9 +5133,12 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        interp_clear_ins (td->last_ins);
 
                                        interp_add_ins (td, MINT_LDC_I8);
+                                       td->sp--;
+                                       push_simple_type (td, STACK_TYPE_I8);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                        WRITE64_INS (td->last_ins, 0, &ct);
                                } else {
-                                       interp_add_ins (td, MINT_CONV_I8_I4);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
                                }
                                break;
                        }
@@ -4619,25 +5147,26 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        case STACK_TYPE_MP:
 #if SIZEOF_VOID_P == 4
                                interp_add_ins (td, MINT_CONV_I8_I4);
+#else
+                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
 #endif
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
                        break;
                case CEE_CONV_R4:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_R4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R4, MINT_CONV_R4_R8);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_R4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R4, MINT_CONV_R4_I8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_R4_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R4, MINT_CONV_R4_I4);
                                break;
                        case STACK_TYPE_R4:
                                /* no-op */
@@ -4646,19 +5175,18 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R4);
                        break;
                case CEE_CONV_R8:
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_R8_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R8_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_R8_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R8_I8);
                                break;
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_R8_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R8_R4);
                                break;
                        case STACK_TYPE_R8:
                                break;
@@ -4666,7 +5194,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R8);
                        break;
                case CEE_CONV_U8:
                        CHECK_STACK (td, 1);
@@ -4677,29 +5204,33 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        interp_clear_ins (td->last_ins);
 
                                        interp_add_ins (td, MINT_LDC_I8);
+                                       td->sp--;
+                                       push_simple_type (td, STACK_TYPE_I8);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                        WRITE64_INS (td->last_ins, 0, &ct);
                                } else {
-                                       interp_add_ins (td, MINT_CONV_I8_U4);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_U4);
                                }
                                break;
                        case STACK_TYPE_I8:
                                break;
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_U8_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_U8_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_U8_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_U8_R8);
                                break;
                        case STACK_TYPE_MP:
 #if SIZEOF_VOID_P == 4
-                               interp_add_ins (td, MINT_CONV_I8_U4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_U4);
+#else
+                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
 #endif
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
                        break;
                case CEE_CPOBJ: {
                        CHECK_STACK (td, 2);
@@ -4710,14 +5241,22 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
                        if (m_class_is_valuetype (klass)) {
                                int mt = mint_type (m_class_get_byval_arg (klass));
+                               td->sp -= 2;
                                interp_add_ins (td, (mt == MINT_TYPE_VT) ? MINT_CPOBJ_VT : MINT_CPOBJ);
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                                td->last_ins->data [0] = get_data_item_index(td, klass);
                        } else {
+                               td->sp--;
                                interp_add_ins (td, MINT_LDIND_REF);
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+
+                               td->sp -= 2;
                                interp_add_ins (td, MINT_STIND_REF);
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                        }
                        td->ip += 5;
-                       td->sp -= 2;
                        break;
                }
                case CEE_LDOBJ: {
@@ -4740,20 +5279,22 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                }
                case CEE_LDSTR: {
                        token = mono_metadata_token_index (read32 (td->ip + 1));
-                       td->ip += 5;
+                       push_type (td, STACK_TYPE_O, mono_defaults.string_class);
                        if (method->wrapper_type == MONO_WRAPPER_NONE) {
                                MonoString *s = mono_ldstr_checked (domain, image, token, error);
                                goto_if_nok (error, exit);
                                /* GC won't scan code stream, but reference is held by metadata
                                 * machinery so we are good here */
                                interp_add_ins (td, MINT_LDSTR);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->last_ins->data [0] = get_data_item_index (td, s);
                        } else {
                                /* defer allocation to execution-time */
                                interp_add_ins (td, MINT_LDSTR_TOKEN);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->last_ins->data [0] = get_data_item_index (td, GUINT_TO_POINTER (token));
                        }
-                       push_type (td, STACK_TYPE_O, mono_defaults.string_class);
+                       td->ip += 5;
                        break;
                }
                case CEE_NEWOBJ: {
@@ -4784,53 +5325,57 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
                        int ret_mt = mint_type (m_class_get_byval_arg (klass));
                        if (mono_class_is_magic_int (klass) || mono_class_is_magic_float (klass)) {
-                               td->sp -= csignature->param_count;
+                               g_assert (csignature->param_count == 1);
 #if SIZEOF_VOID_P == 8
-                               if (mono_class_is_magic_int (klass) && td->sp [0].type == STACK_TYPE_I4)
-                                       interp_add_ins (td, MINT_CONV_I8_I4);
-                               else if (mono_class_is_magic_float (klass) && td->sp [0].type == STACK_TYPE_R4)
-                                       interp_add_ins (td, MINT_CONV_R8_R4);
+                               if (mono_class_is_magic_int (klass) && td->sp [-1].type == STACK_TYPE_I4)
+                                       interp_add_conv (td, td->sp - 1, NULL, stack_type [ret_mt], MINT_CONV_I8_I4);
+                               else if (mono_class_is_magic_float (klass) && td->sp [-1].type == STACK_TYPE_R4)
+                                       interp_add_conv (td, td->sp - 1, NULL, stack_type [ret_mt], MINT_CONV_R8_R4);
 #endif
-                               interp_add_ins (td, MINT_NEWOBJ_MAGIC);
-                               td->last_ins->data [0] = get_data_item_index (td, mono_interp_get_imethod (domain, m, error));
-                               goto_if_nok (error, exit);
-
-                               push_type (td, stack_type [ret_mt], klass);
                        } else if (klass == mono_defaults.int_class && csignature->param_count == 1)  {
-                               td->sp--;
 #if SIZEOF_VOID_P == 8
-                               if (td->sp [0].type == STACK_TYPE_I4)
-                                       interp_add_ins (td, MINT_CONV_I8_I4);
+                               if (td->sp [-1].type == STACK_TYPE_I4)
+                                       interp_add_conv (td, td->sp - 1, NULL, stack_type [ret_mt], MINT_CONV_I8_I4);
 #else
-                               if (td->sp [0].type == STACK_TYPE_I8)
-                                       interp_add_ins (td, MINT_CONV_OVF_I4_I8);
+                               if (td->sp [-1].type == STACK_TYPE_I8)
+                                       interp_add_conv (td, td->sp - 1, NULL, stack_type [ret_mt], MINT_CONV_OVF_I4_I8);
 #endif
-
-                               push_type (td, stack_type [ret_mt], klass);
                        } else if (m_class_get_parent (klass) == mono_defaults.array_class) {
+                               td->sp -= csignature->param_count;
+                               for (int i = 0; i < csignature->param_count; i++)
+                                       td->locals [td->sp [i].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+
                                interp_add_ins (td, MINT_NEWOBJ_ARRAY);
                                td->last_ins->data [0] = get_data_item_index (td, m->klass);
                                td->last_ins->data [1] = csignature->param_count;
-                               td->sp -= csignature->param_count;
                                push_type (td, stack_type [ret_mt], klass);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->locals [td->sp [-1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
                        } else if (klass == mono_defaults.string_class) {
                                guint32 tos_offset = get_tos_offset (td);
                                td->sp -= csignature->param_count;
                                guint32 params_stack_size = tos_offset - get_tos_offset (td);
 
+                               for (int i = 0; i < csignature->param_count; i++)
+                                       td->locals [td->sp [i].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+
                                interp_add_ins (td, MINT_NEWOBJ_STRING);
                                td->last_ins->data [0] = get_data_item_index (td, mono_interp_get_imethod (domain, m, error));
                                td->last_ins->data [1] = params_stack_size;
                                push_type (td, stack_type [ret_mt], klass);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->locals [td->sp [-1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
                        } else if (m_class_get_image (klass) == mono_defaults.corlib &&
                                        !strcmp (m_class_get_name (m->klass), "ByReference`1") &&
                                        !strcmp (m->name, ".ctor")) {
                                /* public ByReference(ref T value) */
                                g_assert (csignature->hasthis && csignature->param_count == 1);
-                               /* We already have the vt on top of the stack */
-                               interp_add_ins (td, MINT_NOP);
                                td->sp--;
+                               /* We already have the vt on top of the stack. Just do a dummy mov that should be optimized out */
+                               interp_add_ins (td, MINT_MOV_P);
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                push_type_vt (td, klass, mono_class_value_size (klass, NULL));
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        } else if (m_class_get_image (klass) == mono_defaults.corlib &&
                                        (!strcmp (m_class_get_name (m->klass), "Span`1") ||
                                        !strcmp (m_class_get_name (m->klass), "ReadOnlySpan`1")) &&
@@ -4840,16 +5385,24 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                /* ctor frequently used with ReadOnlySpan over static arrays */
                                interp_add_ins (td, MINT_INTRINS_SPAN_CTOR);
                                td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                                push_type_vt (td, klass, mono_class_value_size (klass, NULL));
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        } else {
                                guint32 tos_offset = get_tos_offset (td);
                                td->sp -= csignature->param_count;
                                guint32 params_stack_size = tos_offset - get_tos_offset (td);
 
                                // Move params types in temporary buffer
+                               // FIXME stop leaking sp_params
                                StackInfo *sp_params = (StackInfo*) g_malloc (sizeof (StackInfo) * csignature->param_count);
                                memcpy (sp_params, td->sp, sizeof (StackInfo) * csignature->param_count);
 
+                               // We must not optimize out these locals, storing to them is part of the interp call convention
+                               // FIXME this affects inlining efficiency. We need to first remove the param moving by NEWOBJ
+                               for (int i = 0; i < csignature->param_count; i++)
+                                       td->locals [sp_params [i].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+
                                // Push the return value and `this` argument to the ctor
                                gboolean is_vt = m_class_is_valuetype (klass);
                                int vtsize = 0;
@@ -4864,6 +5417,8 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        push_type (td, stack_type [ret_mt], klass);
                                        push_type (td, stack_type [ret_mt], klass);
                                }
+                               int dreg = td->sp [-2].local;
+                               td->locals [dreg].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
 
                                // Push back the params to top of stack
                                push_types (td, sp_params, csignature->param_count);
@@ -4875,28 +5430,34 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
                                        if (is_vt) {
                                                newobj_fast = interp_add_ins (td, MINT_NEWOBJ_VT_FAST);
-                                               newobj_fast->data [2] = vtsize;
-                                               // FIXME Remove this once we dump stack based design. It is here only to inform cprop
-                                               // how many args this instruction receives in inlined case and it is a bad pattern.
-                                               newobj_fast->data [3] = csignature->param_count;
+                                               interp_ins_set_dreg (newobj_fast, dreg);
+                                               newobj_fast->data [1] = ALIGN_TO (vtsize, MINT_STACK_SLOT_SIZE);
                                        } else {
                                                MonoVTable *vtable = mono_class_vtable_checked (domain, klass, error);
                                                goto_if_nok (error, exit);
                                                newobj_fast = interp_add_ins (td, MINT_NEWOBJ_FAST);
-                                               newobj_fast->data [2] = get_data_item_index (td, vtable);
-                                               newobj_fast->data [3] = csignature->param_count;
+                                               interp_ins_set_dreg (newobj_fast, dreg);
+                                               newobj_fast->data [1] = get_data_item_index (td, vtable);
                                        }
-                                       newobj_fast->data [1] = params_stack_size;
+                                       // FIXME remove these once we have our own local offset allocator, even for execution stack locals
+                                       newobj_fast->data [2] = params_stack_size;
+                                       newobj_fast->data [3] = csignature->param_count;
 
-                                       // We don't support inlining ctors of MINT_TYPE_VT which also receive a MINT_TYPE_VT
-                                       // as an argument. The reason is that we would need to push this on the vtstack before
-                                       // the argument, which is very awkward for uncommon scenario.
                                        if ((mono_interp_opt & INTERP_OPT_INLINE) && interp_method_check_inlining (td, m, csignature)) {
                                                MonoMethodHeader *mheader = interp_method_get_header (m, error);
                                                goto_if_nok (error, exit);
 
+                                               // Add local mapping information for cprop to use, in case we inline
+                                               int param_count = csignature->param_count;
+                                               int *newobj_reg_map = (int*)mono_mempool_alloc (td->mempool, sizeof (int) * param_count * 2);
+                                               for (int i = 0; i < param_count; i++) {
+                                                       newobj_reg_map [2 * i] = sp_params [i].local;
+                                                       newobj_reg_map [2 * i + 1] = td->sp [-param_count + i].local;
+                                               }
+
                                                if (interp_inline_method (td, m, mheader, error)) {
                                                        newobj_fast->data [0] = INLINED_METHOD_FLAG;
+                                                       newobj_fast->info.newobj_reg_map = newobj_reg_map;
                                                        break;
                                                }
                                        }
@@ -4907,6 +5468,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                } else {
                                        interp_add_ins (td, MINT_NEWOBJ);
                                        g_assert (!m_class_is_valuetype (klass));
+                                       interp_ins_set_dreg (td->last_ins, dreg);
                                        td->last_ins->data [0] = get_data_item_index (td, mono_interp_get_imethod (domain, m, error));
                                        td->last_ins->data [1] = params_stack_size;
                                }
@@ -4924,8 +5486,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        klass = mini_get_class (method, token, generic_context);
                        CHECK_TYPELOAD (klass);
                        interp_handle_isinst (td, klass, isinst_instr);
-                       if (!isinst_instr)
-                               td->sp [-1].klass = klass;
                        break;
                }
                case CEE_CONV_R_UN:
@@ -4933,15 +5493,14 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        case STACK_TYPE_R8:
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_R_UN_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R_UN_I8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_R_UN_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R_UN_I4);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R8);
                        ++td->ip;
                        break;
                case CEE_UNBOX:
@@ -4971,14 +5530,19 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                 */
                                int local = create_interp_local (td, m_class_get_byval_arg (klass));
                                store_local (td, local);
+
                                interp_add_ins (td, MINT_LDLOCA_S);
-                               td->last_ins->data [0] = local;
-                               td->locals [local].indirects++;
                                push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               interp_ins_set_sreg (td->last_ins, local);
+                               td->locals [local].indirects++;
                        } else {
                                interp_add_ins (td, MINT_UNBOX);
+                               td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->last_ins->data [0] = get_data_item_index (td, klass);
-                               SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_MP);
                                td->ip += 5;
                        }
                        break;
@@ -5001,14 +5565,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        push_type_vt (td, klass, mono_class_value_size (klass, NULL));
                                else
                                        push_type (td, stack_type [mt], klass);
+                               // FIXME do this somewhere else, maybe in super instruction pass, where we would check
+                               // instruction patterns
+                               // Restore the local that is on top of the stack
+                               td->sp [-1].local = td->last_ins->sregs [0];
                                td->ip += 5;
                                break;
                        }
 
                        if (mini_type_is_reference (m_class_get_byval_arg (klass))) {
-                               int mt = mint_type (m_class_get_byval_arg (klass));
                                interp_handle_isinst (td, klass, FALSE);
-                               SET_TYPE (td->sp - 1, stack_type [mt], klass);
                        } else if (mono_class_is_nullable (klass)) {
                                MonoMethod *target_method;
                                if (m_class_is_enumtype (mono_class_get_nullable_param_internal (klass)))
@@ -5021,6 +5587,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        goto exit;
                        } else {
                                interp_add_ins (td, MINT_UNBOX);
+                               td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->last_ins->data [0] = get_data_item_index (td, klass);
 
                                interp_emit_ldobj (td, klass);
@@ -5032,9 +5602,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                case CEE_THROW:
                        INLINE_FAILURE;
                        CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_THROW);
+                       interp_add_ins (td, MINT_THROW);
+                       interp_ins_set_sreg (td->last_ins, td->sp [-1].local);
                        link_bblocks = FALSE;
                        td->sp = td->stack;
+                       ++td->ip;
                        break;
                case CEE_LDFLDA: {
                        CHECK_STACK (td, 1);
@@ -5052,14 +5624,19 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                interp_add_ins (td, MINT_MONO_LDPTR);
                                td->last_ins->data [0] = get_data_item_index (td, klass);
                                push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+
                                interp_add_ins (td, MINT_MONO_LDPTR);
                                td->last_ins->data [0] = get_data_item_index (td, field);
                                push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+
                                interp_add_ins (td, MINT_LDC_I4);
                                WRITE32_INS (td->last_ins, 0, &offset);
                                push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_I8_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
 #endif
 
                                MonoMethod *wrapper = mono_marshal_get_ldflda_wrapper (field->type);
@@ -5070,22 +5647,25 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 #endif
                        {
                                if (is_static) {
-                                       interp_add_ins (td, MINT_POP);
+                                       td->sp--;
                                        interp_emit_ldsflda (td, field, error);
                                        goto_if_nok (error, exit);
                                } else {
-                                       if ((td->sp - 1)->type == STACK_TYPE_O) {
+                                       td->sp--;
+                                       if (td->sp->type == STACK_TYPE_O) {
                                                interp_add_ins (td, MINT_LDFLDA);
                                        } else {
-                                               int sp_type = (td->sp - 1)->type;
+                                               int sp_type = td->sp->type;
                                                g_assert (sp_type == STACK_TYPE_MP || sp_type == STACK_TYPE_I);
                                                interp_add_ins (td, MINT_LDFLDA_UNSAFE);
                                        }
                                        td->last_ins->data [0] = m_class_is_valuetype (klass) ? field->offset - MONO_ABI_SIZEOF (MonoObject) : field->offset;
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                                       push_simple_type (td, STACK_TYPE_MP);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                }
                                td->ip += 5;
                        }
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_MP);
                        break;
                }
                case CEE_LDFLD: {
@@ -5100,22 +5680,28 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        MonoClass *field_klass = mono_class_from_mono_type_internal (ftype);
                        mt = mint_type (m_class_get_byval_arg (field_klass));
                        int field_size = mono_class_value_size (field_klass, NULL);
-                       field_size = ALIGN_TO (field_size, MINT_VT_ALIGNMENT);
                        int obj_size = mono_class_value_size (klass, NULL);
                        obj_size = ALIGN_TO (obj_size, MINT_VT_ALIGNMENT);
 
 #ifndef DISABLE_REMOTING
-                       if ((m_class_get_marshalbyref (klass) && !(signature->hasthis && td->last_ins->opcode == MINT_LDLOC_O && td->last_ins->data [0] == 0)) ||
+                       if (m_class_get_marshalbyref (klass) ||
                                        mono_class_is_contextbound (klass) ||
                                        klass == mono_defaults.marshalbyrefobject_class) {
                                g_assert (!is_static);
                                interp_add_ins (td, mt == MINT_TYPE_VT ? MINT_LDRMFLD_VT :  MINT_LDRMFLD);
+                               td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                td->last_ins->data [0] = get_data_item_index (td, field);
+                               if (mt == MINT_TYPE_VT)
+                                       push_type_vt (td, field_klass, field_size);
+                               else
+                                       push_type (td, stack_type [mt], field_klass);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        } else
 #endif
                        {
                                if (is_static) {
-                                       interp_add_ins (td, MINT_POP);
+                                       td->sp--;
                                        interp_emit_sfld_access (td, field, field_klass, mt, TRUE, error);
                                        goto_if_nok (error, exit);
                                } else if (td->sp [-1].type == STACK_TYPE_VT) {
@@ -5131,10 +5717,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 #endif
                                        interp_add_ins (td, opcode);
                                        g_assert (m_class_is_valuetype (klass));
+                                       td->sp--;
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                        td->last_ins->data [0] = field->offset - MONO_ABI_SIZEOF (MonoObject);
-                                       td->last_ins->data [1] = obj_size;
                                        if (mt == MINT_TYPE_VT)
-                                               td->last_ins->data [2] = field_size;
+                                               td->last_ins->data [1] = field_size;
+                                       if (mt == MINT_TYPE_VT)
+                                               push_type_vt (td, field_klass, field_size);
+                                       else
+                                               push_type (td, stack_type [mt], field_klass);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                } else {
                                        int opcode = MINT_LDFLD_I1 + mt - MINT_TYPE_I1;
 #ifdef NO_UNALIGNED_ACCESS
@@ -5142,18 +5734,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                                opcode = get_unaligned_opcode (opcode);
 #endif
                                        interp_add_ins (td, opcode);
+                                       td->sp--;
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                        td->last_ins->data [0] = m_class_is_valuetype (klass) ? field->offset - MONO_ABI_SIZEOF (MonoObject) : field->offset;
                                        if (mt == MINT_TYPE_VT) {
                                                int size = mono_class_value_size (field_klass, NULL);
-                                               WRITE32_INS (td->last_ins, 1, &size);
+                                               g_assert (size < G_MAXUINT16);
+                                               td->last_ins->data [1] = size;
                                        }
+                                       if (mt == MINT_TYPE_VT)
+                                               push_type_vt (td, field_klass, field_size);
+                                       else
+                                               push_type (td, stack_type [mt], field_klass);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                }
                        }
-                       td->sp--;
-                       if (mt == MINT_TYPE_VT)
-                               push_type_vt (td, field_klass, field_size);
-                       else
-                               push_type (td, stack_type [mt], field_klass);
                        td->ip += 5;
                        BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_ACQ);
                        break;
@@ -5175,6 +5770,8 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        if (m_class_get_marshalbyref (klass)) {
                                g_assert (!is_static);
                                interp_add_ins (td, mt == MINT_TYPE_VT ? MINT_STRMFLD_VT : MINT_STRMFLD);
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                                td->last_ins->data [0] = get_data_item_index (td, field);
                        } else
 #endif
@@ -5184,7 +5781,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        goto_if_nok (error, exit);
 
                                        /* pop the unused object reference */
-                                       interp_add_ins (td, MINT_POP);
+                                       td->sp--;
 
                                        /* the vtable of the field might not be initialized at this point */
                                        mono_class_vtable_checked (domain, field_klass, error);
@@ -5196,6 +5793,8 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                                opcode = get_unaligned_opcode (opcode);
 #endif
                                        interp_add_ins (td, opcode);
+                                       td->sp -= 2;
+                                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                                        td->last_ins->data [0] = m_class_is_valuetype (klass) ? field->offset - MONO_ABI_SIZEOF (MonoObject) : field->offset;
                                        if (mt == MINT_TYPE_VT) {
                                                /* the vtable of the field might not be initialized at this point */
@@ -5211,7 +5810,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                }
                        }
                        td->ip += 5;
-                       td->sp -= 2;
                        break;
                }
                case CEE_LDSFLDA: {
@@ -5221,7 +5819,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        interp_emit_ldsflda (td, field, error);
                        goto_if_nok (error, exit);
                        td->ip += 5;
-                       push_simple_type (td, STACK_TYPE_MP);
                        break;
                }
                case CEE_LDSFLD: {
@@ -5239,6 +5836,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        {
                                interp_add_ins (td, (TARGET_BYTE_ORDER == G_LITTLE_ENDIAN) ? MINT_LDC_I4_1 : MINT_LDC_I4_0);
                                push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->ip += 5;
                                break;
                        }
@@ -5246,12 +5844,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        interp_emit_sfld_access (td, field, klass, mt, TRUE, error);
                        goto_if_nok (error, exit);
 
-                       if (mt == MINT_TYPE_VT) {
-                               int size = mono_class_value_size (klass, NULL);
-                               push_type_vt (td, klass, size);
-                       } else {
-                               push_type (td, stack_type [mt], klass);
-                       }
                        td->ip += 5;
                        break;
                }
@@ -5274,7 +5866,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        goto_if_nok (error, exit);
 
                        td->ip += 5;
-                       --td->sp;
                        break;
                }
                case CEE_STOBJ: {
@@ -5299,29 +5890,28 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R8:
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_OVF_I8_UN_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_OVF_I8_UN_R8);
 #else
-                               interp_add_ins (td, MINT_CONV_OVF_I4_UN_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_OVF_I4_UN_R8);
 #endif
                                break;
                        case STACK_TYPE_I8:
 #if SIZEOF_VOID_P == 4
-                               interp_add_ins (td, MINT_CONV_OVF_I4_U8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_OVF_I4_U8);
 #endif
                                break;
                        case STACK_TYPE_I4:
 #if SIZEOF_VOID_P == 8
-                               interp_add_ins (td, MINT_CONV_I8_U4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_I8_U4);
 #elif SIZEOF_VOID_P == 4
                                if (*td->ip == CEE_CONV_OVF_I_UN)
-                                       interp_add_ins (td, MINT_CONV_OVF_I4_U4);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I, MINT_CONV_OVF_I4_U4);
 #endif
                                break;
                        default:
                                g_assert_not_reached ();
                                break;
                        }
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I);
                        ++td->ip;
                        break;
                case CEE_CONV_OVF_I8_UN:
@@ -5329,23 +5919,22 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_OVF_I8_UN_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_I8_UN_R8);
                                break;
                        case STACK_TYPE_I8:
                                if (*td->ip == CEE_CONV_OVF_I8_UN)
-                                       interp_add_ins (td, MINT_CONV_OVF_I8_U8);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_I8_U8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_I8_U4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_U4);
                                break;
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_OVF_I8_UN_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_I8_UN_R4);
                                break;
                        default:
                                g_assert_not_reached ();
                                break;
                        }
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
                        ++td->ip;
                        break;
                case CEE_BOX: {
@@ -5375,15 +5964,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                const gboolean vt = mint_type (m_class_get_byval_arg (klass)) == MINT_TYPE_VT;
 
                                if (td->sp [-1].type == STACK_TYPE_R8 && m_class_get_byval_arg (klass)->type == MONO_TYPE_R4)
-                                       interp_add_ins (td, MINT_CONV_R4_R8);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R4, MINT_CONV_R4_R8);
                                MonoVTable *vtable = mono_class_vtable_checked (domain, klass, error);
                                goto_if_nok (error, exit);
 
                                td->sp--;
                                interp_add_ins (td, vt ? MINT_BOX_VT : MINT_BOX);
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                td->last_ins->data [0] = get_data_item_index (td, vtable);
-                               td->last_ins->data [1] = 0;
                                push_type (td, STACK_TYPE_O, klass);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->ip += 5;
                        }
 
@@ -5406,26 +5996,32 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        unsigned char lentype = (td->sp - 1)->type;
                        if (lentype == STACK_TYPE_I8) {
                                /* mimic mini behaviour */
-                               interp_add_ins (td, MINT_CONV_OVF_U4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U4_I8);
                        } else {
                                g_assert (lentype == STACK_TYPE_I4);
-                               interp_add_ins (td, MINT_CONV_OVF_U4_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U4_I4);
                        }
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I4);
+                       td->sp--;
                        interp_add_ins (td, MINT_NEWARR);
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_type (td, STACK_TYPE_O, array_class);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->last_ins->data [0] = get_data_item_index (td, vtable);
-                       SET_TYPE (td->sp - 1, STACK_TYPE_O, array_class);
                        td->ip += 5;
                        break;
                }
                case CEE_LDLEN:
                        CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_LDLEN);
+                       td->sp--;
+                       interp_add_ins (td, MINT_LDLEN);
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
 #ifdef MONO_BIG_ARRAYS
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I8);
+                       push_simple_type (td, STACK_TYPE_I8);
 #else
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_I4);
+                       push_simple_type (td, STACK_TYPE_I4);
 #endif
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       ++td->ip;
                        break;
                case CEE_LDELEMA: {
                        gint32 size;
@@ -5448,166 +6044,107 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                mono_class_setup_vtable (klass);
                                CHECK_TYPELOAD (klass);
                                interp_add_ins (td, MINT_LDELEMA_TC);
-                               td->last_ins->data [0] = 1;
-                               td->last_ins->data [1] = get_data_item_index (td, klass);
+                               td->sp -= 2;
+                               td->locals [td->sp [0].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+                               td->locals [td->sp [1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+                               push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->locals [td->sp [-1].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+                               td->last_ins->data [0] = get_data_item_index (td, klass);
                        } else {
                                interp_add_ins (td, MINT_LDELEMA1);
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                mono_class_init_internal (klass);
                                size = mono_class_array_element_size (klass);
-                               WRITE32_INS (td->last_ins, 0, &size);
+                               td->last_ins->data [0] = size;
                        }
 
                        readonly = FALSE;
 
                        td->ip += 5;
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_MP);
                        break;
                }
                case CEE_LDELEM_I1:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_I1);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                       handle_ldelem (td, MINT_LDELEM_I1, STACK_TYPE_I4);
                        break;
                case CEE_LDELEM_U1:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_U1);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                       handle_ldelem (td, MINT_LDELEM_U1, STACK_TYPE_I4);
                        break;
                case CEE_LDELEM_I2:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_I2);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                       handle_ldelem (td, MINT_LDELEM_I2, STACK_TYPE_I4);
                        break;
                case CEE_LDELEM_U2:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_U2);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                       handle_ldelem (td, MINT_LDELEM_U2, STACK_TYPE_I4);
                        break;
                case CEE_LDELEM_I4:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_I4);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                       handle_ldelem (td, MINT_LDELEM_I4, STACK_TYPE_I4);
                        break;
                case CEE_LDELEM_U4:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_U4);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                       handle_ldelem (td, MINT_LDELEM_U4, STACK_TYPE_I4);
                        break;
                case CEE_LDELEM_I8:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_I8);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
+                       handle_ldelem (td, MINT_LDELEM_I8, STACK_TYPE_I8);
                        break;
                case CEE_LDELEM_I:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_I);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I);
+                       handle_ldelem (td, MINT_LDELEM_I, STACK_TYPE_I);
                        break;
                case CEE_LDELEM_R4:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_R4);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R4);
+                       handle_ldelem (td, MINT_LDELEM_R4, STACK_TYPE_R4);
                        break;
                case CEE_LDELEM_R8:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_R8);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R8);
+                       handle_ldelem (td, MINT_LDELEM_R8, STACK_TYPE_R8);
                        break;
                case CEE_LDELEM_REF:
-                       CHECK_STACK (td, 2);
-                       ENSURE_I4 (td, 1);
-                       SIMPLE_OP (td, MINT_LDELEM_REF);
-                       --td->sp;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_O);
+                       handle_ldelem (td, MINT_LDELEM_REF, STACK_TYPE_O);
                        break;
                case CEE_LDELEM:
-                       CHECK_STACK (td, 2);
                        token = read32 (td->ip + 1);
                        klass = mini_get_class (method, token, generic_context);
                        CHECK_TYPELOAD (klass);
                        switch (mint_type (m_class_get_byval_arg (klass))) {
                                case MINT_TYPE_I1:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_I1);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                                       handle_ldelem (td, MINT_LDELEM_I1, STACK_TYPE_I4);
                                        break;
                                case MINT_TYPE_U1:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_U1);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                                       handle_ldelem (td, MINT_LDELEM_U1, STACK_TYPE_I4);
                                        break;
                                case MINT_TYPE_U2:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_U2);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                                       handle_ldelem (td, MINT_LDELEM_U2, STACK_TYPE_I4);
                                        break;
                                case MINT_TYPE_I2:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_I2);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                                       handle_ldelem (td, MINT_LDELEM_I2, STACK_TYPE_I4);
                                        break;
                                case MINT_TYPE_I4:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_I4);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                                       handle_ldelem (td, MINT_LDELEM_I4, STACK_TYPE_I4);
                                        break;
                                case MINT_TYPE_I8:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_I8);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
+                                       handle_ldelem (td, MINT_LDELEM_I8, STACK_TYPE_I8);
                                        break;
                                case MINT_TYPE_R4:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_R4);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R4);
+                                       handle_ldelem (td, MINT_LDELEM_R4, STACK_TYPE_R4);
                                        break;
                                case MINT_TYPE_R8:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_R8);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_R8);
+                                       handle_ldelem (td, MINT_LDELEM_R8, STACK_TYPE_R8);
                                        break;
                                case MINT_TYPE_O:
-                                       ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_REF);
-                                       --td->sp;
-                                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_O);
+                                       handle_ldelem (td, MINT_LDELEM_REF, STACK_TYPE_O);
                                        break;
                                case MINT_TYPE_VT: {
                                        int size = mono_class_value_size (klass, NULL);
+                                       g_assert (size < G_MAXUINT16);
+
+                                       CHECK_STACK (td, 2);
                                        ENSURE_I4 (td, 1);
-                                       SIMPLE_OP (td, MINT_LDELEM_VT);
-                                       WRITE32_INS (td->last_ins, 0, &size);
+                                       interp_add_ins (td, MINT_LDELEM_VT);
                                        td->sp -= 2;
+                                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                                        push_type_vt (td, klass, size);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                                       td->last_ins->data [0] = size;
+                                       ++td->ip;
                                        break;
                                }
                                default: {
@@ -5622,92 +6159,68 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        td->ip += 4;
                        break;
                case CEE_STELEM_I:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_I);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_I);
                        break;
                case CEE_STELEM_I1:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_I1);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_I1);
                        break;
                case CEE_STELEM_I2:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_I2);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_I2);
                        break;
                case CEE_STELEM_I4:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_I4);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_I4);
                        break;
                case CEE_STELEM_I8:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_I8);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_I8);
                        break;
                case CEE_STELEM_R4:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_R4);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_R4);
                        break;
                case CEE_STELEM_R8:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_R8);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_R8);
                        break;
                case CEE_STELEM_REF:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
-                       SIMPLE_OP (td, MINT_STELEM_REF);
-                       td->sp -= 3;
+                       handle_stelem (td, MINT_STELEM_REF);
                        break;
                case CEE_STELEM:
-                       CHECK_STACK (td, 3);
-                       ENSURE_I4 (td, 2);
                        token = read32 (td->ip + 1);
                        klass = mini_get_class (method, token, generic_context);
                        CHECK_TYPELOAD (klass);
                        switch (mint_type (m_class_get_byval_arg (klass))) {
                                case MINT_TYPE_I1:
-                                       SIMPLE_OP (td, MINT_STELEM_I1);
+                                       handle_stelem (td, MINT_STELEM_I1);
                                        break;
                                case MINT_TYPE_U1:
-                                       SIMPLE_OP (td, MINT_STELEM_U1);
+                                       handle_stelem (td, MINT_STELEM_U1);
                                        break;
                                case MINT_TYPE_I2:
-                                       SIMPLE_OP (td, MINT_STELEM_I2);
+                                       handle_stelem (td, MINT_STELEM_I2);
                                        break;
                                case MINT_TYPE_U2:
-                                       SIMPLE_OP (td, MINT_STELEM_U2);
+                                       handle_stelem (td, MINT_STELEM_U2);
                                        break;
                                case MINT_TYPE_I4:
-                                       SIMPLE_OP (td, MINT_STELEM_I4);
+                                       handle_stelem (td, MINT_STELEM_I4);
                                        break;
                                case MINT_TYPE_I8:
-                                       SIMPLE_OP (td, MINT_STELEM_I8);
+                                       handle_stelem (td, MINT_STELEM_I8);
                                        break;
                                case MINT_TYPE_R4:
-                                       SIMPLE_OP (td, MINT_STELEM_R4);
+                                       handle_stelem (td, MINT_STELEM_R4);
                                        break;
                                case MINT_TYPE_R8:
-                                       SIMPLE_OP (td, MINT_STELEM_R8);
+                                       handle_stelem (td, MINT_STELEM_R8);
                                        break;
                                case MINT_TYPE_O:
-                                       SIMPLE_OP (td, MINT_STELEM_REF);
+                                       handle_stelem (td, MINT_STELEM_REF);
                                        break;
                                case MINT_TYPE_VT: {
                                        int size = mono_class_value_size (klass, NULL);
-                                       SIMPLE_OP (td, MINT_STELEM_VT);
+                                       g_assert (size < G_MAXUINT16);
+
+                                       handle_stelem (td, MINT_STELEM_VT);
                                        td->last_ins->data [0] = get_data_item_index (td, klass);
-                                       WRITE32_INS (td->last_ins, 1, &size);
+                                       td->last_ins->data [1] = size;
                                        break;
                                }
                                default: {
@@ -5720,20 +6233,15 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                }
                        }
                        td->ip += 4;
-                       td->sp -= 3;
                        break;
-#if 0
-               case CEE_CONV_OVF_U1:
-
-               case CEE_CONV_OVF_I8:
-
-#if SIZEOF_VOID_P == 8
-               case CEE_CONV_OVF_U:
-#endif
-#endif
                case CEE_CKFINITE:
                        CHECK_STACK (td, 1);
-                       SIMPLE_OP (td, MINT_CKFINITE);
+                       interp_add_ins (td, MINT_CKFINITE);
+                       td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_simple_type (td, STACK_TYPE_R8);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       ++td->ip;
                        break;
                case CEE_MKREFANY:
                        CHECK_STACK (td, 1);
@@ -5743,11 +6251,13 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_TYPELOAD (klass);
 
                        interp_add_ins (td, MINT_MKREFANY);
+                       td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                       push_type_vt (td, mono_defaults.typed_reference_class, sizeof (MonoTypedRef));
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                        td->last_ins->data [0] = get_data_item_index (td, klass);
 
                        td->ip += 5;
-                       td->sp--;
-                       push_type_vt (td, mono_defaults.typed_reference_class, sizeof (MonoTypedRef));
                        break;
                case CEE_REFANYVAL: {
                        CHECK_STACK (td, 1);
@@ -5757,11 +6267,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_TYPELOAD (klass);
 
                        interp_add_ins (td, MINT_REFANYVAL);
-                       td->last_ins->data [0] = get_data_item_index (td, klass);
-
                        td->sp--;
+                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                        push_simple_type (td, STACK_TYPE_MP);
-                       SET_SIMPLE_TYPE (td->sp - 1, STACK_TYPE_MP);
+                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                       td->last_ins->data [0] = get_data_item_index (td, klass);
 
                        td->ip += 5;
                        break;
@@ -5772,22 +6282,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I1_UN_R4 : MINT_CONV_OVF_I1_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I1_UN_R4 : MINT_CONV_OVF_I1_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I1_UN_R8 : MINT_CONV_OVF_I1_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I1_UN_R8 : MINT_CONV_OVF_I1_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I1_U4 : MINT_CONV_OVF_I1_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I1_U4 : MINT_CONV_OVF_I1_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I1_U8 : MINT_CONV_OVF_I1_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I1_U8 : MINT_CONV_OVF_I1_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                }
                case CEE_CONV_OVF_U1:
@@ -5795,22 +6304,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_OVF_U1_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U1_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_OVF_U1_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U1_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_OVF_U1_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U1_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_OVF_U1_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U1_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                case CEE_CONV_OVF_I2:
                case CEE_CONV_OVF_I2_UN: {
@@ -5818,22 +6326,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I2_UN_R4 : MINT_CONV_OVF_I2_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I2_UN_R4 : MINT_CONV_OVF_I2_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I2_UN_R8 : MINT_CONV_OVF_I2_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I2_UN_R8 : MINT_CONV_OVF_I2_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I2_U4 : MINT_CONV_OVF_I2_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I2_U4 : MINT_CONV_OVF_I2_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, is_un ? MINT_CONV_OVF_I2_U8 : MINT_CONV_OVF_I2_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, is_un ? MINT_CONV_OVF_I2_U8 : MINT_CONV_OVF_I2_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
                }
                case CEE_CONV_OVF_U2_UN:
@@ -5841,22 +6348,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_OVF_U2_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U2_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_OVF_U2_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U2_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_OVF_U2_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U2_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_OVF_U2_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U2_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
 #if SIZEOF_VOID_P == 4
                case CEE_CONV_OVF_I:
@@ -5866,26 +6372,25 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_OVF_I4_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_I4_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_OVF_I4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_I4_R8);
                                break;
                        case STACK_TYPE_I4:
                                if (*td->ip == CEE_CONV_OVF_I4_UN)
-                                       interp_add_ins (td, MINT_CONV_OVF_I4_U4);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_I4_U4);
                                break;
                        case STACK_TYPE_I8:
                                if (*td->ip == CEE_CONV_OVF_I4_UN)
-                                       interp_add_ins (td, MINT_CONV_OVF_I4_U8);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_I4_U8);
                                else
-                                       interp_add_ins (td, MINT_CONV_OVF_I4_I8);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_I4_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
 #if SIZEOF_VOID_P == 4
                case CEE_CONV_OVF_U:
@@ -5895,26 +6400,25 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_OVF_U4_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U4_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_OVF_U4_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U4_R8);
                                break;
                        case STACK_TYPE_I4:
                                if (*td->ip != CEE_CONV_OVF_U4_UN)
-                                       interp_add_ins (td, MINT_CONV_OVF_U4_I4);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U4_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_OVF_U4_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U4_I8);
                                break;
                        case STACK_TYPE_MP:
-                               interp_add_ins (td, MINT_CONV_OVF_U4_P);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_OVF_U4_P);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
                        break;
 #if SIZEOF_VOID_P == 8
                case CEE_CONV_OVF_I:
@@ -5923,13 +6427,13 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_OVF_I8_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_I8_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_OVF_I8_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_I8_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_I8_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_I8_I4);
                                break;
                        case STACK_TYPE_I8:
                                break;
@@ -5937,7 +6441,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
                        break;
 #if SIZEOF_VOID_P == 8
                case CEE_CONV_OVF_U:
@@ -5946,22 +6449,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        CHECK_STACK (td, 1);
                        switch (td->sp [-1].type) {
                        case STACK_TYPE_R4:
-                               interp_add_ins (td, MINT_CONV_OVF_U8_R4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_U8_R4);
                                break;
                        case STACK_TYPE_R8:
-                               interp_add_ins (td, MINT_CONV_OVF_U8_R8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_U8_R8);
                                break;
                        case STACK_TYPE_I4:
-                               interp_add_ins (td, MINT_CONV_OVF_U8_I4);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_U8_I4);
                                break;
                        case STACK_TYPE_I8:
-                               interp_add_ins (td, MINT_CONV_OVF_U8_I8);
+                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I8, MINT_CONV_OVF_U8_I8);
                                break;
                        default:
                                g_assert_not_reached ();
                        }
                        ++td->ip;
-                       SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I8);
                        break;
                case CEE_LDTOKEN: {
                        int size;
@@ -6011,6 +6513,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        else
                                                interp_add_ins (td, MINT_LDC_I4_0);
                                        push_simple_type (td, STACK_TYPE_I4);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                        td->ip = next_next_ip + 5;
                                        break;
                                }
@@ -6018,13 +6521,15 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                interp_add_ins (td, MINT_MONO_LDPTR);
                                gpointer systype = mono_type_get_object_checked (domain, (MonoType*)handle, error);
                                goto_if_nok (error, exit);
-                               td->last_ins->data [0] = get_data_item_index (td, systype);
                                push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->last_ins->data [0] = get_data_item_index (td, systype);
                                td->ip = next_ip + 5;
                        } else {
                                interp_add_ins (td, MINT_LDTOKEN);
-                               td->last_ins->data [0] = get_data_item_index (td, handle);
                                push_type_vt (td, klass, sizeof (gpointer));
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->last_ins->data [0] = get_data_item_index (td, handle);
                                td->ip += 5;
                        }
 
@@ -6057,9 +6562,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                case CEE_ENDFINALLY: {
                        g_assert (td->clause_indexes [in_offset] != -1);
                        td->sp = td->stack;
-                       SIMPLE_OP (td, MINT_ENDFINALLY);
+                       interp_add_ins (td, MINT_ENDFINALLY);
                        td->last_ins->data [0] = td->clause_indexes [in_offset];
                        link_bblocks = FALSE;
+                       ++td->ip;
                        break;
                }
                case CEE_LEAVE:
@@ -6107,64 +6613,82 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        switch (*td->ip) {
                                case CEE_MONO_RETHROW:
                                        CHECK_STACK (td, 1);
-                                       SIMPLE_OP (td, MINT_MONO_RETHROW);
+                                       interp_add_ins (td, MINT_MONO_RETHROW);
+                                       td->sp--;
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                        td->sp = td->stack;
+                                       ++td->ip;
                                        break;
 
                                case CEE_MONO_LD_DELEGATE_METHOD_PTR:
                                        --td->sp;
                                        td->ip += 1;
                                        interp_add_ins (td, MINT_LD_DELEGATE_METHOD_PTR);
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                        push_simple_type (td, STACK_TYPE_I);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);          
                                        break;
-                               case CEE_MONO_CALLI_EXTRA_ARG:
+                               case CEE_MONO_CALLI_EXTRA_ARG: {
+                                       int saved_local = td->sp [-1].local;
                                        /* Same as CEE_CALLI, except that we drop the extra arg required for llvm specific behaviour */
-                                       interp_add_ins (td, MINT_POP1);
-                                       --td->sp;
+                                       td->sp -= 2;
+                                       StackInfo tos = td->sp [1];
+
+                                       // Push back to top of stack and fixup the local offset
+                                       push_types (td, &tos, 1);
+                                       td->sp [-1].local = saved_local;
+                                       td->locals [saved_local].stack_offset = td->sp [-1].offset;
+
                                        if (!interp_transform_call (td, method, NULL, domain, generic_context, NULL, FALSE, error, FALSE, FALSE, FALSE))
                                                goto exit;
                                        break;
+                               }
                                case CEE_MONO_JIT_ICALL_ADDR: {
                                        const guint32 token = read32 (td->ip + 1);
                                        td->ip += 5;
                                        const gconstpointer func = mono_find_jit_icall_info ((MonoJitICallId)token)->func;
 
                                        interp_add_ins (td, MINT_LDFTN);
-                                       td->last_ins->data [0] = get_data_item_index (td, (gpointer)func);
                                        push_simple_type (td, STACK_TYPE_I);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                                       td->last_ins->data [0] = get_data_item_index (td, (gpointer)func);
                                        break;
                                }
                                case CEE_MONO_ICALL: {
+                                       int dreg;
                                        MonoJitICallId const jit_icall_id = (MonoJitICallId)read32 (td->ip + 1);
                                        MonoJitICallInfo const * const info = mono_find_jit_icall_info (jit_icall_id);
                                        td->ip += 5;
 
                                        CHECK_STACK (td, info->sig->param_count);
+                                       td->sp -= info->sig->param_count;
+                                       for (int i = 0; i < info->sig->param_count; i++)
+                                               td->locals [td->sp [i].local].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+                                       if (!MONO_TYPE_IS_VOID (info->sig->ret)) {
+                                               int mt = mint_type (info->sig->ret);
+                                               push_simple_type (td, stack_type [mt]);
+                                               dreg = td->sp [-1].local;
+                                               td->locals [dreg].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
+                                       } else {
+                                               // Create a new dummy local to serve as the dreg of the call
+                                               // This dreg is only used to resolve the call args offset
+                                               push_simple_type (td, STACK_TYPE_I4);
+                                               td->sp--;
+                                               dreg = td->sp [0].local;
+                                       }
                                        if (jit_icall_id == MONO_JIT_ICALL_mono_threads_attach_coop) {
                                                rtm->needs_thread_attach = 1;
-
-                                               /* attach needs two arguments, and has one return value: leave one element on the stack */
-                                               interp_add_ins (td, MINT_POP);
                                        } else if (jit_icall_id == MONO_JIT_ICALL_mono_threads_detach_coop) {
                                                g_assert (rtm->needs_thread_attach);
-
-                                               /* detach consumes two arguments, and no return value: drop both of them */
-                                               interp_add_ins (td, MINT_POP);
-                                               interp_add_ins (td, MINT_POP);
                                        } else {
                                                int const icall_op = interp_icall_op_for_sig (info->sig);
                                                g_assert (icall_op != -1);
 
                                                interp_add_ins (td, icall_op);
                                                // hash here is overkill
+                                               interp_ins_set_dreg (td->last_ins, dreg);
                                                td->last_ins->data [0] = get_data_item_index (td, (gpointer)info->func);
                                        }
-                                       td->sp -= info->sig->param_count;
-
-                                       if (!MONO_TYPE_IS_VOID (info->sig->ret)) {
-                                               int mt = mint_type (info->sig->ret);
-                                               push_simple_type (td, stack_type [mt]);
-                                       }
                                        break;
                                }
                        case CEE_MONO_VTADDR: {
@@ -6177,14 +6701,17 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        size = mono_class_value_size (klass, NULL);
 
                                int local = create_interp_local_explicit (td, m_class_get_byval_arg (klass), size);
-                               interp_add_ins (td, MINT_STLOC_VT);
-                               td->last_ins->data [0] = local;
-                               WRITE32_INS (td->last_ins, 1, &size);
+                               interp_add_ins (td, MINT_MOV_VT);
                                td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               interp_ins_set_dreg (td->last_ins, local);
+                               td->last_ins->data [0] = size;
 
                                interp_add_ins (td, MINT_LDLOCA_S);
-                               td->last_ins->data [0] = local;
                                push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               interp_ins_set_sreg (td->last_ins, local);
+                               td->locals [local].indirects++;
 
                                ++td->ip;
                                break;
@@ -6195,18 +6722,20 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                token = read32 (td->ip + 1);
                                td->ip += 5;
                                interp_add_ins (td, MINT_MONO_LDPTR);
-                               td->last_ins->data [0] = get_data_item_index (td, mono_method_get_wrapper_data (method, token));
                                push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->last_ins->data [0] = get_data_item_index (td, mono_method_get_wrapper_data (method, token));
                                break;
                        case CEE_MONO_PINVOKE_ADDR_CACHE: {
                                token = read32 (td->ip + 1);
                                td->ip += 5;
                                interp_add_ins (td, MINT_MONO_LDPTR);
                                g_assert (method->wrapper_type != MONO_WRAPPER_NONE);
+                               push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                /* This is a memory slot used by the wrapper */
                                gpointer addr = mono_mem_manager_alloc0 (td->mem_manager, sizeof (gpointer));
                                td->last_ins->data [0] = get_data_item_index (td, addr);
-                               push_simple_type (td, STACK_TYPE_I);
                                break;
                        }
                        case CEE_MONO_OBJADDR:
@@ -6219,8 +6748,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                token = read32 (td->ip + 1);
                                td->ip += 5;
                                interp_add_ins (td, MINT_MONO_NEWOBJ);
-                               td->last_ins->data [0] = get_data_item_index (td, mono_method_get_wrapper_data (method, token));
                                push_simple_type (td, STACK_TYPE_O);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->last_ins->data [0] = get_data_item_index (td, mono_method_get_wrapper_data (method, token));
                                break;
                        case CEE_MONO_RETOBJ:
                                CHECK_STACK (td, 1);
@@ -6228,7 +6758,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                td->ip += 5;
                                interp_add_ins (td, MINT_MONO_RETOBJ);
                                td->sp--;
-
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                klass = (MonoClass *)mono_method_get_wrapper_data (method, token);
                                
                                /*stackval_from_data (signature->ret, frame->retval, sp->data.vt, signature->pinvoke);*/
@@ -6245,8 +6775,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
                                int size = mono_class_native_size (klass, NULL);
                                interp_add_ins (td, MINT_LDOBJ_VT);
-                               WRITE32_INS (td->last_ins, 0, &size);
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                push_type_vt (td, klass, size);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->last_ins->data [0] = size;
                                break;
                        }
                        case CEE_MONO_TLS: {
@@ -6255,13 +6787,15 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                g_assertf (key == TLS_KEY_SGEN_THREAD_INFO, "%d", key);
                                interp_add_ins (td, MINT_MONO_SGEN_THREAD_INFO);
                                push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                break;
                        }
                        case CEE_MONO_ATOMIC_STORE_I4:
                                CHECK_STACK (td, 2);
-                               SIMPLE_OP (td, MINT_MONO_ATOMIC_STORE_I4);
+                               interp_add_ins (td, MINT_MONO_ATOMIC_STORE_I4);
                                td->sp -= 2;
-                               td->ip++;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               td->ip += 2;
                                break;
                        case CEE_MONO_SAVE_LMF:
                        case CEE_MONO_RESTORE_LMF:
@@ -6270,8 +6804,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                break;
                        case CEE_MONO_LDPTR_INT_REQ_FLAG:
                                interp_add_ins (td, MINT_MONO_LDPTR);
-                               td->last_ins->data [0] = get_data_item_index (td, &mono_thread_interruption_request_flag);
                                push_type (td, STACK_TYPE_MP, NULL);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               td->last_ins->data [0] = get_data_item_index (td, &mono_thread_interruption_request_flag);
                                ++td->ip;
                                break;
                        case CEE_MONO_MEMORY_BARRIER:
@@ -6281,6 +6816,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        case CEE_MONO_LDDOMAIN:
                                interp_add_ins (td, MINT_MONO_LDDOMAIN);
                                push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
                                break;
                        case CEE_MONO_SAVE_LAST_ERROR:
@@ -6290,6 +6826,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        case CEE_MONO_GET_SP:
                                interp_add_ins (td, MINT_MONO_GET_SP);
                                push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
                                break;
                        default:
@@ -6323,13 +6860,15 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        interp_add_ins (td, MINT_CEQ_I4 + STACK_TYPE_I - STACK_TYPE_I4);
                                } else {
                                        if (td->sp [-1].type == STACK_TYPE_R4 && td->sp [-2].type == STACK_TYPE_R8)
-                                               interp_add_ins (td, MINT_CONV_R8_R4);
+                                               interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_R8, MINT_CONV_R8_R4);
                                        if (td->sp [-1].type == STACK_TYPE_R8 && td->sp [-2].type == STACK_TYPE_R4)
-                                               interp_add_ins (td, MINT_CONV_R8_R4_SP);
+                                               interp_add_conv (td, td->sp - 2, NULL, STACK_TYPE_R8, MINT_CONV_R8_R4);
                                        interp_add_ins (td, MINT_CEQ_I4 + td->sp [-1].type - STACK_TYPE_I4);
                                }
-                               --td->sp;
-                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
                                break;
                        case CEE_CGT:
@@ -6338,8 +6877,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        interp_add_ins (td, MINT_CGT_I4 + STACK_TYPE_I - STACK_TYPE_I4);
                                else
                                        interp_add_ins (td, MINT_CGT_I4 + td->sp [-1].type - STACK_TYPE_I4);
-                               --td->sp;
-                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
                                break;
                        case CEE_CGT_UN:
@@ -6348,8 +6889,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        interp_add_ins (td, MINT_CGT_UN_I4 + STACK_TYPE_I - STACK_TYPE_I4);
                                else
                                        interp_add_ins (td, MINT_CGT_UN_I4 + td->sp [-1].type - STACK_TYPE_I4);
-                               --td->sp;
-                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
                                break;
                        case CEE_CLT:
@@ -6358,8 +6901,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        interp_add_ins (td, MINT_CLT_I4 + STACK_TYPE_I - STACK_TYPE_I4);
                                else
                                        interp_add_ins (td, MINT_CLT_I4 + td->sp [-1].type - STACK_TYPE_I4);
-                               --td->sp;
-                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
                                break;
                        case CEE_CLT_UN:
@@ -6368,17 +6913,15 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        interp_add_ins (td, MINT_CLT_UN_I4 + STACK_TYPE_I - STACK_TYPE_I4);
                                else
                                        interp_add_ins (td, MINT_CLT_UN_I4 + td->sp [-1].type - STACK_TYPE_I4);
-                               --td->sp;
-                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_I4);
+                               td->sp -= 2;
+                               interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
+                               push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
                                break;
                        case CEE_LDVIRTFTN: /* fallthrough */
                        case CEE_LDFTN: {
                                MonoMethod *m;
-                               if (*td->ip == CEE_LDVIRTFTN) {
-                                       CHECK_STACK (td, 1);
-                                       --td->sp;
-                               }
                                token = read32 (td->ip + 1);
                                m = interp_get_method (method, token, image, generic_context, error);
                                goto_if_nok (error, exit);
@@ -6396,8 +6939,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        if (m->flags & METHOD_ATTRIBUTE_PINVOKE_IMPL) {
                                                interp_generate_not_supported_throw (td);
                                                interp_add_ins (td, MINT_LDNULL);
-                                               td->ip += 5;
                                                push_simple_type (td, STACK_TYPE_MP);
+                                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                                               td->ip += 5;
                                                break;
                                        }
 
@@ -6428,6 +6972,8 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                                interp_generate_ipe_throw_with_msg (td, wrapper_error);
                                                mono_interp_error_cleanup (wrapper_error);
                                                interp_add_ins (td, MINT_LDNULL);
+                                               push_simple_type (td, STACK_TYPE_MP);
+                                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                        } else {
                                                /* push a pointer to a trampoline that calls m */
                                                gpointer entry = mini_get_interp_callbacks ()->create_method_pointer (m, TRUE, error);
@@ -6438,17 +6984,29 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                                interp_add_ins (td, MINT_LDC_I4);
                                                WRITE32_INS (td->last_ins, 0, &entry);
 #endif
+                                               push_simple_type (td, STACK_TYPE_MP);
+                                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                        }
                                        td->ip += 5;
-                                       push_simple_type (td, STACK_TYPE_MP);
                                        break;
                                }
-
-                               interp_add_ins (td, *td->ip == CEE_LDFTN ? MINT_LDFTN : MINT_LDVIRTFTN);
-                               td->last_ins->data [0] = get_data_item_index (td, mono_interp_get_imethod (domain, m, error));
+                       
+                               int index = get_data_item_index (td, mono_interp_get_imethod (domain, m, error));
                                goto_if_nok (error, exit);
-                               td->ip += 5;
+                               if (*td->ip == CEE_LDVIRTFTN) {
+                                       CHECK_STACK (td, 1);
+                                       --td->sp;
+                                       interp_add_ins (td, MINT_LDVIRTFTN);
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                                       td->last_ins->data [0] = index;
+                               } else {
+                                       interp_add_ins (td, MINT_LDFTN);
+                                       td->last_ins->data [0] = index;
+                               }
                                push_simple_type (td, STACK_TYPE_F);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+
+                               td->ip += 5;
                                break;
                        }
                        case CEE_LDARG: {
@@ -6465,15 +7023,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
 
                                if (!inlining) {
                                        interp_add_ins (td, MINT_LDLOCA_S);
-                                       td->last_ins->data [0] = n;
+                                       interp_ins_set_sreg (td->last_ins, n);
                                        td->locals [n].indirects++;
                                } else {
                                        int loc_n = arg_locals [n];
                                        interp_add_ins (td, MINT_LDLOCA_S);
-                                       td->last_ins->data [0] = loc_n;
+                                       interp_ins_set_sreg (td->last_ins, n);
                                        td->locals [loc_n].indirects++;
                                }
                                push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->ip += 3;
                                break;
                        }
@@ -6502,9 +7061,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        loc_n += num_args;
                                else
                                        loc_n = local_locals [loc_n];
-                               td->last_ins->data [0] = loc_n;
+                               interp_ins_set_sreg (td->last_ins, loc_n);
                                td->locals [loc_n].indirects++;
                                push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->ip += 3;
                                break;
                        }
@@ -6522,21 +7082,26 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                CHECK_STACK (td, 1);
 #if SIZEOF_VOID_P == 8
                                if (td->sp [-1].type == STACK_TYPE_I8)
-                                       interp_add_ins (td, MINT_CONV_I4_I8);
+                                       interp_add_conv (td, td->sp - 1, NULL, STACK_TYPE_I4, MINT_CONV_I4_I8);
 #endif                         
                                interp_add_ins (td, MINT_LOCALLOC);
                                if (td->sp != td->stack + 1)
                                        g_warning("CEE_LOCALLOC: stack not empty");
-                               ++td->ip;
-                               SET_SIMPLE_TYPE(td->sp - 1, STACK_TYPE_MP);
+                               td->sp--;
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               push_simple_type (td, STACK_TYPE_MP);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                td->has_localloc = TRUE;
+                               ++td->ip;
                                break;
 #if 0
                        case CEE_UNUSED57: ves_abort(); break;
 #endif
                        case CEE_ENDFILTER:
                                interp_add_ins (td, MINT_ENDFILTER);
+                               interp_ins_set_sreg (td->last_ins, td->sp [-1].local);
                                ++td->ip;
+                               link_bblocks = FALSE;
                                break;
                        case CEE_UNALIGNED_:
                                td->ip += 2;
@@ -6556,15 +7121,20 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                klass = mini_get_class (method, token, generic_context);
                                CHECK_TYPELOAD (klass);
                                if (m_class_is_valuetype (klass)) {
+                                       --td->sp;
                                        interp_add_ins (td, MINT_INITOBJ);
+                                       interp_ins_set_sreg (td->last_ins, td->sp [0].local);
                                        i32 = mono_class_value_size (klass, NULL);
-                                       WRITE32_INS (td->last_ins, 0, &i32);
-                                       --td->sp;
+                                       g_assert (i32 < G_MAXUINT16);
+                                       td->last_ins->data [0] = i32;
                                } else {
                                        interp_add_ins (td, MINT_LDNULL);
                                        push_type (td, STACK_TYPE_O, NULL);
+                                       interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+
                                        interp_add_ins (td, MINT_STIND_REF);
                                        td->sp -= 2;
+                                       interp_ins_set_sregs2 (td->last_ins, td->sp [0].local, td->sp [1].local);
                                }
                                td->ip += 5;
                                break;
@@ -6574,8 +7144,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                if (volatile_)
                                        interp_add_ins (td, MINT_MONO_MEMORY_BARRIER);
                                interp_add_ins (td, MINT_CPBLK);
-                               BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_SEQ);
                                td->sp -= 3;
+                               interp_ins_set_sregs3 (td->last_ins, td->sp [0].local, td->sp [1].local, td->sp [2].local);
+                               BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_SEQ);
                                ++td->ip;
                                break;
                        case CEE_READONLY_:
@@ -6593,6 +7164,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                BARRIER_IF_VOLATILE (td, MONO_MEMORY_BARRIER_REL);
                                interp_add_ins (td, MINT_INITBLK);
                                td->sp -= 3;
+                               interp_ins_set_sregs3 (td->last_ins, td->sp [0].local, td->sp [1].local, td->sp [2].local);
                                td->ip += 1;
                                break;
                        case CEE_NO_:
@@ -6602,10 +7174,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        case CEE_RETHROW: {
                                int clause_index = td->clause_indexes [in_offset];
                                g_assert (clause_index != -1);
-                               SIMPLE_OP (td, MINT_RETHROW);
+                               interp_add_ins (td, MINT_RETHROW);
                                td->last_ins->data [0] = rtm->clause_data_offsets [clause_index];
                                td->sp = td->stack;
                                link_bblocks = FALSE;
+                               ++td->ip;
                                break;
                        }
                        case CEE_SIZEOF: {
@@ -6630,13 +7203,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                interp_add_ins (td, MINT_LDC_I4);
                                WRITE32_INS (td->last_ins, 0, &size);
                                push_simple_type (td, STACK_TYPE_I4);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
                                break;
                        }
                        case CEE_REFANYTYPE:
                                interp_add_ins (td, MINT_REFANYTYPE);
-                               td->ip += 1;
                                td->sp--;
-                               push_type_vt (td, NULL, sizeof (gpointer));
+                               interp_ins_set_sreg (td->last_ins, td->sp [0].local);
+                               push_simple_type (td, STACK_TYPE_I);
+                               interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+                               ++td->ip;
                                break;
                        default:
                                g_error ("transform.c: Unimplemented opcode: 0xFE %02x (%s) at 0x%x\n", *td->ip, mono_opcode_name (256 + *td->ip), td->ip-header->code);
@@ -6696,15 +7272,15 @@ handle_relocations (TransformData *td)
 
                switch (reloc->type) {
                case RELOC_SHORT_BRANCH:
-                       g_assert (td->new_code [reloc->offset + 1] == 0xdead);
-                       td->new_code [reloc->offset + 1] = offset;
+                       g_assert (td->new_code [reloc->offset + reloc->skip + 1] == 0xdead);
+                       td->new_code [reloc->offset + reloc->skip + 1] = offset;
                        break;
                case RELOC_LONG_BRANCH: {
                        guint16 *v = (guint16 *) &offset;
-                       g_assert (td->new_code [reloc->offset + 1] == 0xdead);
-                       g_assert (td->new_code [reloc->offset + 2] == 0xbeef);
-                       td->new_code [reloc->offset + 1] = *(guint16 *) v;
-                       td->new_code [reloc->offset + 2] = *(guint16 *) (v + 1);
+                       g_assert (td->new_code [reloc->offset + reloc->skip + 1] == 0xdead);
+                       g_assert (td->new_code [reloc->offset + reloc->skip + 2] == 0xbeef);
+                       td->new_code [reloc->offset + reloc->skip + 1] = *(guint16 *) v;
+                       td->new_code [reloc->offset + reloc->skip + 2] = *(guint16 *) (v + 1);
                        break;
                }
                case RELOC_SWITCH: {
@@ -6722,7 +7298,6 @@ handle_relocations (TransformData *td)
        }
 }
 
-
 static int
 get_inst_length (InterpInst *ins)
 {
@@ -6736,96 +7311,6 @@ get_inst_length (InterpInst *ins)
                return mono_interp_oplen [ins->opcode];
 }
 
-static void
-get_inst_stack_usage (TransformData *td, InterpInst *ins, int *pop, int *push)
-{
-       guint16 opcode = ins->opcode;
-       if (mono_interp_oppop [opcode] == MINT_VAR_POP ||
-                       mono_interp_oppush [opcode] == MINT_VAR_PUSH) {
-               switch (opcode) {
-               case MINT_JIT_CALL:
-               case MINT_CALL:
-               case MINT_CALLVIRT:
-               case MINT_CALLVIRT_FAST: {
-                       InterpMethod *imethod = (InterpMethod*) td->data_items [ins->data [0]];
-                       *pop = imethod->param_count + imethod->hasthis;
-                       *push = imethod->rtype->type != MONO_TYPE_VOID;
-                       break;
-               }
-#ifndef ENABLE_NETCORE
-               case MINT_CALLRUN: {
-                       MonoMethodSignature *csignature = (MonoMethodSignature*) td->data_items [ins->data [1]];
-                       *pop = csignature->param_count + csignature->hasthis;
-                       *push = csignature->ret->type != MONO_TYPE_VOID;
-                       break;
-               }
-#endif
-               case MINT_CALL_DELEGATE: {
-                       MonoMethodSignature *csignature = (MonoMethodSignature*) td->data_items [ins->data [0]];
-                       *pop = csignature->param_count + 1;
-                       *push = csignature->ret->type != MONO_TYPE_VOID;
-                       break;
-               }
-               case MINT_CALLI:
-               case MINT_CALLI_NAT:
-               case MINT_CALLI_NAT_DYNAMIC:
-               case MINT_CALLI_NAT_FAST: {
-                       MonoMethodSignature *csignature = (MonoMethodSignature*) td->data_items [ins->data [0]];
-                       *pop = csignature->param_count + csignature->hasthis + 1;
-                       *push = csignature->ret->type != MONO_TYPE_VOID;
-                       break;
-               }
-               case MINT_CALL_VARARG: {
-                       InterpMethod *imethod = (InterpMethod*) td->data_items [ins->data [0]];
-                       MonoMethodSignature *csignature = (MonoMethodSignature*) td->data_items [ins->data [2]];
-                       *pop = imethod->param_count + imethod->hasthis + csignature->param_count - csignature->sentinelpos;
-                       *push = imethod->rtype->type != MONO_TYPE_VOID;
-                       break;
-               }
-               case MINT_NEWOBJ_VT_FAST:
-               case MINT_NEWOBJ_FAST: {
-                       gboolean is_inlined = ins->data [0] == INLINED_METHOD_FLAG;
-                       if (is_inlined) {
-                               // This needs to be handled explictly during cprop, in order to properly
-                               // keep track of stack contents
-                               *pop = 0;
-                               *push = 2;
-                       } else {
-                               InterpMethod *imethod = (InterpMethod*) td->data_items [ins->data [0]];
-                               *pop = imethod->param_count;
-                               *push = 1;
-                       }
-                       break;
-               }
-               case MINT_NEWOBJ_ARRAY:
-                       *pop = ins->data [1];
-                       *push = 1;
-                       break;
-               case MINT_NEWOBJ_STRING: {
-                       InterpMethod *imethod = (InterpMethod*) td->data_items [ins->data [0]];
-                       *pop = imethod->param_count;
-                       *push = 1;
-                       break;
-               }
-               case MINT_LDELEMA:
-               case MINT_LDELEMA_TC:
-                       *pop = ins->data [0] + 1;
-                       *push = 1;
-                       break;
-               case MINT_NEWOBJ: {
-                       InterpMethod *imethod = (InterpMethod*) td->data_items [ins->data [0]];
-                       *pop = imethod->param_count;
-                       *push = 1;
-                       break;
-               }
-               default:
-                       g_assert_not_reached ();
-               }
-       } else {
-               *pop = mono_interp_oppop [opcode];
-               *push = mono_interp_oppush [opcode];
-       }
-}
 
 static guint16*
 emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *ins)
@@ -6851,6 +7336,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
        *ip++ = opcode;
        if (opcode == MINT_SWITCH) {
                int labels = READ32 (&ins->data [0]);
+               *ip++ = get_interp_local_offset (td, ins->sregs [0], TRUE);
                // Write number of switch labels
                *ip++ = ins->data [0];
                *ip++ = ins->data [1];
@@ -6868,6 +7354,8 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
                        (opcode >= MINT_BEQ_I4_S && opcode <= MINT_BLT_UN_R8_S) ||
                        opcode == MINT_BR_S || opcode == MINT_LEAVE_S || opcode == MINT_LEAVE_S_CHECK || opcode == MINT_CALL_HANDLER_S) {
                const int br_offset = start_ip - td->new_code;
+               for (int i = 0; i < mono_interp_op_sregs [opcode]; i++)
+                       *ip++ = get_interp_local_offset (td, ins->sregs [i], TRUE);
                if (ins->info.target_bb->native_offset >= 0) {
                        // Backwards branch. We can already patch it.
                        *ip++ = ins->info.target_bb->native_offset - br_offset;
@@ -6875,6 +7363,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
                        // We don't know the in_offset of the target, add a reloc
                        Reloc *reloc = (Reloc*)mono_mempool_alloc0 (td->mempool, sizeof (Reloc));
                        reloc->type = RELOC_SHORT_BRANCH;
+                       reloc->skip = mono_interp_op_sregs [opcode];
                        reloc->offset = br_offset;
                        reloc->target_bb = ins->info.target_bb;
                        g_ptr_array_add (td->relocs, reloc);
@@ -6886,6 +7375,8 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
                        (opcode >= MINT_BEQ_I4 && opcode <= MINT_BLT_UN_R8) ||
                        opcode == MINT_BR || opcode == MINT_LEAVE || opcode == MINT_LEAVE_CHECK || opcode == MINT_CALL_HANDLER) {
                const int br_offset = start_ip - td->new_code;
+               for (int i = 0; i < mono_interp_op_sregs [opcode]; i++)
+                       *ip++ = get_interp_local_offset (td, ins->sregs [i], TRUE);
                if (ins->info.target_bb->native_offset >= 0) {
                        // Backwards branch. We can already patch it
                        int target_offset = ins->info.target_bb->native_offset - br_offset;
@@ -6893,6 +7384,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
                } else {
                        Reloc *reloc = (Reloc*)mono_mempool_alloc0 (td->mempool, sizeof (Reloc));
                        reloc->type = RELOC_LONG_BRANCH;
+                       reloc->skip = mono_interp_op_sregs [opcode];
                        reloc->offset = br_offset;
                        reloc->target_bb = ins->info.target_bb;
                        g_ptr_array_add (td->relocs, reloc);
@@ -6949,25 +7441,42 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
                        *ip++ = MINT_NIY;
 #endif
        } else {
-               if (MINT_IS_LDLOC (opcode) || MINT_IS_STLOC (opcode) || MINT_IS_STLOC_NP (opcode) || opcode == MINT_LDLOCA_S ||
-                               MINT_IS_LDLOCFLD (opcode) || MINT_IS_LOCUNOP (opcode) || MINT_IS_STLOCFLD (opcode)) {
-                       ins->data [0] = get_interp_local_offset (td, ins->data [0]);
-               } else if (MINT_IS_MOVLOC (opcode)) {
-                       ins->data [0] = get_interp_local_offset (td, ins->data [0]);
-                       ins->data [1] = get_interp_local_offset (td, ins->data [1]);
-               } else if (opcode == MINT_INIT_ARGLIST) {
-                       ins->data [0] = get_interp_local_offset (td, ins->data [0]);
+               if (mono_interp_op_dregs [opcode])
+                       *ip++ = get_interp_local_offset (td, ins->dreg, TRUE);
+
+               if (mono_interp_op_sregs [opcode]) {
+                       for (int i = 0; i < mono_interp_op_sregs [opcode]; i++)
+                               *ip++ = get_interp_local_offset (td, ins->sregs [i], TRUE);
+               } else if (opcode == MINT_LDLOCA_S) {
+                       // This opcode receives a local but it is not viewed as a sreg since we don't load the value
+                       *ip++ = get_interp_local_offset (td, ins->sregs [0], TRUE);
                }
 
-               int size = get_inst_length (ins) - 1;
+               int left = get_inst_length (ins) - (ip - start_ip);
                // Emit the rest of the data
-               for (int i = 0; i < size; i++)
+               for (int i = 0; i < left; i++)
                        *ip++ = ins->data [i];
        }
        mono_interp_stats.emitted_instructions++;
        return ip;
 }
 
+static void
+alloc_ins_locals (TransformData *td, InterpInst *ins)
+{
+       int opcode = ins->opcode;
+       if (mono_interp_op_sregs [opcode]) {
+               for (int i = 0; i < mono_interp_op_sregs [opcode]; i++)
+                       get_interp_local_offset (td, ins->sregs [i], FALSE);
+       } else if (opcode == MINT_LDLOCA_S) {
+               // This opcode receives a local but it is not viewed as a sreg since we don't load the value
+               get_interp_local_offset (td, ins->sregs [0], FALSE);
+       }
+
+       if (mono_interp_op_dregs [opcode])
+               get_interp_local_offset (td, ins->dreg, FALSE);
+}
+
 // Generates the final code, after we are done with all the passes
 static void
 generate_compacted_code (TransformData *td)
@@ -6977,11 +7486,12 @@ generate_compacted_code (TransformData *td)
        td->relocs = g_ptr_array_new ();
        InterpBasicBlock *bb;
 
-       // Iterate once to compute the exact size of the compacted code
+       // Iterate once for preliminary computations
        for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
                InterpInst *ins = bb->first_ins;
                while (ins) {
                        size += get_inst_length (ins);
+                       alloc_ins_locals (td, ins);
                        ins = ins->next;
                }
        }
@@ -7007,58 +7517,6 @@ generate_compacted_code (TransformData *td)
        g_ptr_array_free (td->relocs, TRUE);
 }
 
-static int
-get_movloc_for_type (int mt)
-{
-       switch (mt) {
-       case MINT_TYPE_I1:
-       case MINT_TYPE_U1:
-               return MINT_MOVLOC_1;
-       case MINT_TYPE_I2:
-       case MINT_TYPE_U2:
-               return MINT_MOVLOC_2;
-       case MINT_TYPE_I4:
-       case MINT_TYPE_R4:
-               return MINT_MOVLOC_4;
-       case MINT_TYPE_I8:
-       case MINT_TYPE_R8:
-               return MINT_MOVLOC_8;
-       case MINT_TYPE_O:
-#if SIZEOF_VOID_P == 8
-               return MINT_MOVLOC_8;
-#else
-               return MINT_MOVLOC_4;
-#endif
-       case MINT_TYPE_VT:
-               return MINT_MOVLOC_VT;
-       }
-       g_assert_not_reached ();
-}
-
-// The value of local has changed. This means the contents of the stack where the
-// local was loaded, no longer contain the value of the local. Clear them.
-static void
-clear_stack_content_info_for_local (StackContentInfo *start, StackContentInfo *end, int local)
-{
-       StackContentInfo *si;
-       for (si = start; si < end; si++) {
-               if (si->val.type == STACK_VALUE_LOCAL && si->val.local == local)
-                       si->val.type = STACK_VALUE_NONE;
-       }
-}
-
-// The value of local has changed. This means we can no longer assume that any other local
-// is a copy of this local.
-static void
-clear_local_content_info_for_local (StackValue *start, StackValue *end, int local)
-{
-       StackValue *sval;
-       for (sval = start; sval < end; sval++) {
-               if (sval->type == STACK_VALUE_LOCAL && sval->local == local)
-                       sval->type = STACK_VALUE_NONE;
-       }
-}
-
 // Traverse the list of basic blocks and merge adjacent blocks
 static gboolean
 interp_optimize_bblocks (TransformData *td)
@@ -7100,13 +7558,10 @@ interp_local_deadce (TransformData *td, int *local_ref_count)
                g_assert (td->locals [i].indirects >= 0);
                if (!local_ref_count [i] &&
                                !td->locals [i].indirects &&
+                               !(td->locals [i].flags & INTERP_LOCAL_FLAG_CALL_ARGS) &&
                                (td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD) == 0) {
                        needs_dce = TRUE;
-                       // If we do another deadce iteration over the code, make sure we don't try
-                       // to kill instructions accessing locals that have already been handled in
-                       // a previous iteration.
                        td->locals [i].flags |= INTERP_LOCAL_FLAG_DEAD;
-                       break;
                }
        }
 
@@ -7117,24 +7572,29 @@ interp_local_deadce (TransformData *td, int *local_ref_count)
        // Kill instructions that don't use stack and are storing into dead locals
        for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
                for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
-                       if (MINT_IS_STLOC_NP (ins->opcode)) {
-                               if (!local_ref_count [ins->data [0]] && !td->locals [ins->data [0]].indirects) {
-                                       interp_clear_ins (ins);
-                                       mono_interp_stats.killed_instructions++;
-                                       // We killed an instruction that makes use of the stack. This might uncover new optimizations
-                                       needs_cprop = TRUE;
-                               }
-                       } else if (MINT_IS_MOVLOC (ins->opcode)) {
-                               if (!local_ref_count [ins->data [1]] && !td->locals [ins->data [1]].indirects) {
+                       if (MINT_IS_MOV (ins->opcode) ||
+                                       MINT_IS_LDC_I4 (ins->opcode) ||
+                                       ins->opcode == MINT_LDC_I8 ||
+                                       ins->opcode == MINT_MONO_LDPTR ||
+                                       ins->opcode == MINT_LDLOCA_S) {
+                               int dreg = ins->dreg;
+                               if (td->locals [dreg].flags & INTERP_LOCAL_FLAG_DEAD) {
+                                       if (td->verbose_level) {
+                                               g_print ("kill dead ins:\n\t");
+                                               dump_interp_inst (ins);
+                                       }
+
+                                       if (ins->opcode == MINT_LDLOCA_S) {
+                                               mono_interp_stats.ldlocas_removed++;
+                                               td->locals [ins->sregs [0]].indirects--;
+                                               if (!td->locals [ins->sregs [0]].indirects) {
+                                                       // We can do cprop now through this local. Run cprop again.
+                                                       needs_cprop = TRUE;
+                                               }
+                                       }
                                        interp_clear_ins (ins);
                                        mono_interp_stats.killed_instructions++;
-                               }
-                       } else if (MINT_IS_STLOC (ins->opcode) && ins->opcode != MINT_STLOC_VT) {
-                               if (!local_ref_count [ins->data [0]] && !td->locals [ins->data [0]].indirects) {
-                                       // We store to a dead stloc, we can replace it with a POP to save local space
-                                       ins->opcode = MINT_POP;
-                                       mono_interp_stats.added_pop_count++;
-                                       // We might to be able to kill both the pop and the instruction pushing the value
+                                       // FIXME This is lazy. We should update the ref count for the sregs and redo deadce.
                                        needs_cprop = TRUE;
                                }
                        }
@@ -7143,144 +7603,134 @@ interp_local_deadce (TransformData *td, int *local_ref_count)
        return needs_cprop;
 }
 
-#define INTERP_FOLD_UNOP(opcode,stack_type,field,op) \
+#define INTERP_FOLD_UNOP(opcode,val_type,field,op) \
        case opcode: \
-               g_assert (sp->val.type == stack_type); \
-               result.type = stack_type; \
-               result.field = op sp->val.field; \
+               g_assert (val->type == val_type); \
+               result.type = val_type; \
+               result.field = op val->field; \
                break;
 
-#define INTERP_FOLD_CONV(opcode,stack_type_dst,field_dst,stack_type_src,field_src,cast_type) \
+#define INTERP_FOLD_CONV(opcode,val_type_dst,field_dst,val_type_src,field_src,cast_type) \
        case opcode: \
-               g_assert (sp->val.type == stack_type_src); \
-               result.type = stack_type_dst; \
-               result.field_dst = (cast_type)sp->val.field_src; \
+               g_assert (val->type == val_type_src); \
+               result.type = val_type_dst; \
+               result.field_dst = (cast_type)val->field_src; \
                break;
 
-#define INTERP_FOLD_CONV_FULL(opcode,stack_type_dst,field_dst,stack_type_src,field_src,cast_type,cond) \
+#define INTERP_FOLD_CONV_FULL(opcode,val_type_dst,field_dst,val_type_src,field_src,cast_type,cond) \
        case opcode: \
-               g_assert (sp->val.type == stack_type_src); \
-               if (!(cond)) goto cfold_failed; \
-               result.type = stack_type_dst; \
-               result.field_dst = (cast_type)sp->val.field_src; \
+               g_assert (val->type == val_type_src); \
+               if (!(cond)) return ins; \
+               result.type = val_type_dst; \
+               result.field_dst = (cast_type)val->field_src; \
                break;
 
 static InterpInst*
-interp_fold_unop (TransformData *td, StackContentInfo *sp, InterpInst *ins)
+interp_fold_unop (TransformData *td, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
 {
-       StackValue result;
-
-       if (ins->opcode >= MINT_CONV_I4_I8_SP &&
-                       ins->opcode <= MINT_CONV_R8_R4_SP) {
-               // Decrement sp so it's easier to access top of the stack
-               sp -= 2;
-               if (sp->val.type != STACK_VALUE_I4 && sp->val.type != STACK_VALUE_I8)
-                       goto cfold_failed;
-
-               switch (ins->opcode) {
-                       INTERP_FOLD_CONV (MINT_CONV_I4_I8_SP, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint32);
-                       INTERP_FOLD_CONV (MINT_CONV_I8_I4_SP, STACK_VALUE_I8, l, STACK_VALUE_I4, i, gint64);
-                       default:
-                               goto cfold_failed;
-               }
-       } else {
-               // Decrement sp so it's easier to access top of the stack
-               sp--;
-               if (sp->val.type != STACK_VALUE_I4 && sp->val.type != STACK_VALUE_I8)
-                       goto cfold_failed;
-
-               // Top of the stack is a constant
-               switch (ins->opcode) {
-                       INTERP_FOLD_UNOP (MINT_ADD1_I4, STACK_VALUE_I4, i, 1+);
-                       INTERP_FOLD_UNOP (MINT_ADD1_I8, STACK_VALUE_I8, l, 1+);
-                       INTERP_FOLD_UNOP (MINT_SUB1_I4, STACK_VALUE_I4, i, -1+);
-                       INTERP_FOLD_UNOP (MINT_SUB1_I8, STACK_VALUE_I8, l, -1+);
-                       INTERP_FOLD_UNOP (MINT_NEG_I4, STACK_VALUE_I4, i, -);
-                       INTERP_FOLD_UNOP (MINT_NEG_I8, STACK_VALUE_I8, l, -);
-                       INTERP_FOLD_UNOP (MINT_NOT_I4, STACK_VALUE_I4, i, ~);
-                       INTERP_FOLD_UNOP (MINT_NOT_I8, STACK_VALUE_I8, l, ~);
-                       INTERP_FOLD_UNOP (MINT_CEQ0_I4, STACK_VALUE_I4, i, 0 ==);
-
-                       INTERP_FOLD_CONV (MINT_CONV_I1_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, gint8);
-                       INTERP_FOLD_CONV (MINT_CONV_I1_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint8);
-                       INTERP_FOLD_CONV (MINT_CONV_U1_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, guint8);
-                       INTERP_FOLD_CONV (MINT_CONV_U1_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, guint8);
-
-                       INTERP_FOLD_CONV (MINT_CONV_I2_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, gint16);
-                       INTERP_FOLD_CONV (MINT_CONV_I2_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint16);
-                       INTERP_FOLD_CONV (MINT_CONV_U2_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, guint16);
-                       INTERP_FOLD_CONV (MINT_CONV_U2_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, guint16);
-
-                       INTERP_FOLD_CONV (MINT_CONV_I4_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint32);
-                       INTERP_FOLD_CONV (MINT_CONV_U4_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint32);
-
-                       INTERP_FOLD_CONV (MINT_CONV_I8_I4, STACK_VALUE_I8, l, STACK_VALUE_I4, i, gint32);
-                       INTERP_FOLD_CONV (MINT_CONV_I8_U4, STACK_VALUE_I8, l, STACK_VALUE_I4, i, guint32);
-
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, gint8, sp [0].val.i >= G_MININT8 && sp [0].val.i <= G_MAXINT8);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint8, sp [0].val.l >= G_MININT8 && sp [0].val.l <= G_MAXINT8);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_U4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, gint8, sp [0].val.i >= 0 && sp [0].val.i <= G_MAXINT8);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_U8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint8, sp [0].val.l >= 0 && sp [0].val.l <= G_MAXINT8);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U1_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, guint8, sp [0].val.i >= 0 && sp [0].val.i <= G_MAXUINT8);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U1_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, guint8, sp [0].val.l >= 0 && sp [0].val.l <= G_MAXUINT8);
-
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, gint16, sp [0].val.i >= G_MININT16 && sp [0].val.i <= G_MAXINT16);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, i, gint16, sp [0].val.l >= G_MININT16 && sp [0].val.l <= G_MAXINT16);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_U4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, gint16, sp [0].val.i >= 0 && sp [0].val.i <= G_MAXINT16);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_U8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint16, sp [0].val.l >= 0 && sp [0].val.l <= G_MAXINT16);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U2_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, guint16, sp [0].val.i >= 0 && sp [0].val.i <= G_MAXUINT16);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U2_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, guint16, sp [0].val.l >= 0 && sp [0].val.l <= G_MAXUINT16);
-
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I4_U4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, gint32, sp [0].val.i >= 0);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I4_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint32, sp [0].val.l >= G_MININT32 && sp [0].val.l <= G_MAXINT32);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I4_U8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, gint32, sp [0].val.l >= 0 && sp [0].val.l <= G_MAXINT32);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U4_I4, STACK_VALUE_I4, i, STACK_VALUE_I4, i, guint32, sp [0].val.i >= 0);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U4_I8, STACK_VALUE_I4, i, STACK_VALUE_I8, l, guint32, sp [0].val.l >= 0 && sp [0].val.l <= G_MAXINT32);
-
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I8_U8, STACK_VALUE_I8, l, STACK_VALUE_I8, l, gint64, sp [0].val.l >= 0);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U8_I4, STACK_VALUE_I8, l, STACK_VALUE_I4, i, guint64, sp [0].val.i >= 0);
-                       INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U8_I8, STACK_VALUE_I8, l, STACK_VALUE_I8, l, guint64, sp [0].val.l >= 0);
+       // ins should be an unop, therefore it should have a single dreg and a single sreg
+       int dreg = ins->dreg;
+       int sreg = ins->sregs [0];
+       LocalValue *val = &local_defs [sreg];
+       LocalValue result;
 
-                       default:
-                               goto cfold_failed;
-               }
+       if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8)
+               return ins;
+
+       // Top of the stack is a constant
+       switch (ins->opcode) {
+               INTERP_FOLD_UNOP (MINT_ADD1_I4, LOCAL_VALUE_I4, i, 1+);
+               INTERP_FOLD_UNOP (MINT_ADD1_I8, LOCAL_VALUE_I8, l, 1+);
+               INTERP_FOLD_UNOP (MINT_SUB1_I4, LOCAL_VALUE_I4, i, -1+);
+               INTERP_FOLD_UNOP (MINT_SUB1_I8, LOCAL_VALUE_I8, l, -1+);
+               INTERP_FOLD_UNOP (MINT_NEG_I4, LOCAL_VALUE_I4, i, -);
+               INTERP_FOLD_UNOP (MINT_NEG_I8, LOCAL_VALUE_I8, l, -);
+               INTERP_FOLD_UNOP (MINT_NOT_I4, LOCAL_VALUE_I4, i, ~);
+               INTERP_FOLD_UNOP (MINT_NOT_I8, LOCAL_VALUE_I8, l, ~);
+               INTERP_FOLD_UNOP (MINT_CEQ0_I4, LOCAL_VALUE_I4, i, 0 ==);
+
+               // MOV's are just a copy, if the contents of sreg are known
+               INTERP_FOLD_CONV (MINT_MOV_I1, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint32);
+               INTERP_FOLD_CONV (MINT_MOV_U1, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint32);
+               INTERP_FOLD_CONV (MINT_MOV_I2, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint32);
+               INTERP_FOLD_CONV (MINT_MOV_U2, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint32);
+
+               INTERP_FOLD_CONV (MINT_CONV_I1_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint8);
+               INTERP_FOLD_CONV (MINT_CONV_I1_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint8);
+               INTERP_FOLD_CONV (MINT_CONV_U1_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, guint8);
+               INTERP_FOLD_CONV (MINT_CONV_U1_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, guint8);
+
+               INTERP_FOLD_CONV (MINT_CONV_I2_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint16);
+               INTERP_FOLD_CONV (MINT_CONV_I2_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint16);
+               INTERP_FOLD_CONV (MINT_CONV_U2_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, guint16);
+               INTERP_FOLD_CONV (MINT_CONV_U2_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, guint16);
+
+               INTERP_FOLD_CONV (MINT_CONV_I4_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint32);
+               INTERP_FOLD_CONV (MINT_CONV_U4_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint32);
+
+               INTERP_FOLD_CONV (MINT_CONV_I8_I4, LOCAL_VALUE_I8, l, LOCAL_VALUE_I4, i, gint32);
+               INTERP_FOLD_CONV (MINT_CONV_I8_U4, LOCAL_VALUE_I8, l, LOCAL_VALUE_I4, i, guint32);
+
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint8, val->i >= G_MININT8 && val->i <= G_MAXINT8);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint8, val->l >= G_MININT8 && val->l <= G_MAXINT8);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_U4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint8, val->i >= 0 && val->i <= G_MAXINT8);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I1_U8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint8, val->l >= 0 && val->l <= G_MAXINT8);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U1_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, guint8, val->i >= 0 && val->i <= G_MAXUINT8);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U1_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, guint8, val->l >= 0 && val->l <= G_MAXUINT8);
+
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint16, val->i >= G_MININT16 && val->i <= G_MAXINT16);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, i, gint16, val->l >= G_MININT16 && val->l <= G_MAXINT16);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_U4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint16, val->i >= 0 && val->i <= G_MAXINT16);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I2_U8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint16, val->l >= 0 && val->l <= G_MAXINT16);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U2_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, guint16, val->i >= 0 && val->i <= G_MAXUINT16);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U2_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, guint16, val->l >= 0 && val->l <= G_MAXUINT16);
+
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I4_U4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, gint32, val->i >= 0);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I4_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint32, val->l >= G_MININT32 && val->l <= G_MAXINT32);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I4_U8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, gint32, val->l >= 0 && val->l <= G_MAXINT32);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U4_I4, LOCAL_VALUE_I4, i, LOCAL_VALUE_I4, i, guint32, val->i >= 0);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U4_I8, LOCAL_VALUE_I4, i, LOCAL_VALUE_I8, l, guint32, val->l >= 0 && val->l <= G_MAXINT32);
+
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_I8_U8, LOCAL_VALUE_I8, l, LOCAL_VALUE_I8, l, gint64, val->l >= 0);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U8_I4, LOCAL_VALUE_I8, l, LOCAL_VALUE_I4, i, guint64, val->i >= 0);
+               INTERP_FOLD_CONV_FULL (MINT_CONV_OVF_U8_I8, LOCAL_VALUE_I8, l, LOCAL_VALUE_I8, l, guint64, val->l >= 0);
+
+               default:
+                       return ins;
        }
 
-       // We were able to compute the result of the ins instruction. We store the
-       // current value for the top of the stack and, if possible, try to replace the
-       // instructions that are part of this unary operation with a single LDC.
+       // We were able to compute the result of the ins instruction. We replace the unop
+       // with a LDC of the constant. We leave alone the sregs of this instruction, for
+       // deadce to kill the instructions initializing them.
        mono_interp_stats.constant_folds++;
-       if (sp->ins != NULL) {
-               // The instruction that pushed the top of stack can be replaced with the new constant result
-               if (result.type == STACK_VALUE_I4)
-                       sp->ins = interp_get_ldc_i4_from_const (td, sp->ins, result.i);
-               else if (result.type == STACK_VALUE_I8)
-                       sp->ins = interp_inst_replace_with_i8_const (td, sp->ins, result.l);
-               else
-                       g_assert_not_reached ();
-               if (td->verbose_level) {
-                       g_print ("Fold unop :\n\t");
-                       dump_interp_inst (sp->ins);
-               }
-               mono_interp_stats.killed_instructions++;
-               interp_clear_ins (ins);
+
+       if (result.type == LOCAL_VALUE_I4)
+               ins = interp_get_ldc_i4_from_const (td, ins, result.i, dreg);
+       else if (result.type == LOCAL_VALUE_I8)
+               ins = interp_inst_replace_with_i8_const (td, ins, result.l);
+       else
+               g_assert_not_reached ();
+
+       if (td->verbose_level) {
+               g_print ("Fold unop :\n\t");
+               dump_interp_inst (ins);
        }
-       sp->val = result;
-       return ins;
 
-cfold_failed:
-       sp->ins = NULL;
-       sp->val.type = STACK_VALUE_NONE;
+       local_ref_count [sreg]--;
+       local_defs [dreg] = result;
+
        return ins;
 }
 
-#define INTERP_FOLD_UNOP_BR(_opcode,_stack_type,_cond) \
+#define INTERP_FOLD_UNOP_BR(_opcode,_local_type,_cond) \
        case _opcode: \
-               g_assert (sp->val.type == _stack_type); \
+               g_assert (val->type == _local_type); \
                if (_cond) { \
                        ins->opcode = MINT_BR_S; \
                        if (cbb->next_bb != ins->info.target_bb) \
                                interp_unlink_bblocks (cbb, cbb->next_bb); \
+                       for (InterpInst *it = ins->next; it != NULL; it = it->next) \
+                               interp_clear_ins (it); \
                } else { \
                        interp_clear_ins (ins); \
                        interp_unlink_bblocks (cbb, ins->info.target_bb); \
@@ -7288,174 +7738,181 @@ cfold_failed:
                break;
 
 static InterpInst*
-interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, StackContentInfo *sp, InterpInst *ins)
+interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
 {
-       sp--;
-       // If we can't remove the instruction pushing the constant, don't bother
-       if (sp->ins == NULL)
-               return ins;
-       if (sp->val.type != STACK_VALUE_I4 && sp->val.type != STACK_VALUE_I8)
+       // ins should be an unop conditional branch, therefore it should have a single sreg
+       int sreg = ins->sregs [0];
+       LocalValue *val = &local_defs [sreg];
+
+       if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8)
                return ins;
+
        // Top of the stack is a constant
        switch (ins->opcode) {
-               INTERP_FOLD_UNOP_BR (MINT_BRFALSE_I4_S, STACK_VALUE_I4, sp [0].val.i == 0);
-               INTERP_FOLD_UNOP_BR (MINT_BRFALSE_I8_S, STACK_VALUE_I8, sp [0].val.l == 0);
-               INTERP_FOLD_UNOP_BR (MINT_BRTRUE_I4_S, STACK_VALUE_I4, sp [0].val.i != 0);
-               INTERP_FOLD_UNOP_BR (MINT_BRTRUE_I8_S, STACK_VALUE_I8, sp [0].val.l != 0);
+               INTERP_FOLD_UNOP_BR (MINT_BRFALSE_I4_S, LOCAL_VALUE_I4, val->i == 0);
+               INTERP_FOLD_UNOP_BR (MINT_BRFALSE_I8_S, LOCAL_VALUE_I8, val->l == 0);
+               INTERP_FOLD_UNOP_BR (MINT_BRTRUE_I4_S, LOCAL_VALUE_I4, val->i != 0);
+               INTERP_FOLD_UNOP_BR (MINT_BRTRUE_I8_S, LOCAL_VALUE_I8, val->l != 0);
 
                default:
                        return ins;
        }
 
+       if (td->verbose_level) {
+               g_print ("Fold unop cond br :\n\t");
+               dump_interp_inst (ins);
+       }
+
        mono_interp_stats.constant_folds++;
-       mono_interp_stats.killed_instructions++;
-       interp_clear_ins (sp->ins);
-       sp->val.type = STACK_VALUE_NONE;
+       local_ref_count [sreg]--;
        return ins;
 }
 
-#define INTERP_FOLD_BINOP(opcode,stack_type,field,op) \
+#define INTERP_FOLD_BINOP(opcode,local_type,field,op) \
        case opcode: \
-               g_assert (sp [0].val.type == stack_type && sp [1].val.type == stack_type); \
-               result.type = stack_type; \
-               result.field = sp [0].val.field op sp [1].val.field; \
+               g_assert (val1->type == local_type && val2->type == local_type); \
+               result.type = local_type; \
+               result.field = val1->field op val2->field; \
                break;
 
-#define INTERP_FOLD_BINOP_FULL(opcode,stack_type,field,op,cast_type,cond) \
+#define INTERP_FOLD_BINOP_FULL(opcode,local_type,field,op,cast_type,cond) \
        case opcode: \
-               g_assert (sp [0].val.type == stack_type && sp [1].val.type == stack_type); \
-               if (!(cond)) goto cfold_failed; \
-               result.type = stack_type; \
-               result.field = (cast_type)sp [0].val.field op (cast_type)sp [1].val.field; \
+               g_assert (val1->type == local_type && val2->type == local_type); \
+               if (!(cond)) return ins; \
+               result.type = local_type; \
+               result.field = (cast_type)val1->field op (cast_type)val2->field; \
                break;
 
-#define INTERP_FOLD_SHIFTOP(opcode,stack_type,field,shift_op,cast_type) \
+#define INTERP_FOLD_SHIFTOP(opcode,local_type,field,shift_op,cast_type) \
        case opcode: \
-               g_assert (sp [1].val.type == STACK_VALUE_I4); \
-               result.type = stack_type; \
-               result.field = (cast_type)sp [0].val.field shift_op sp [1].val.i; \
+               g_assert (val2->type == LOCAL_VALUE_I4); \
+               result.type = local_type; \
+               result.field = (cast_type)val1->field shift_op val2->i; \
                break;
 
-#define INTERP_FOLD_RELOP(opcode,stack_type,field,relop,cast_type) \
+#define INTERP_FOLD_RELOP(opcode,local_type,field,relop,cast_type) \
        case opcode: \
-               g_assert (sp [0].val.type == stack_type && sp [1].val.type == stack_type); \
-               result.type = STACK_VALUE_I4; \
-               result.i = (cast_type) sp [0].val.field relop (cast_type) sp [1].val.field; \
+               g_assert (val1->type == local_type && val2->type == local_type); \
+               result.type = LOCAL_VALUE_I4; \
+               result.i = (cast_type) val1->field relop (cast_type) val2->field; \
                break;
 
 
 static InterpInst*
-interp_fold_binop (TransformData *td, StackContentInfo *sp, InterpInst *ins)
+interp_fold_binop (TransformData *td, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
 {
-       StackValue result;
-       // Decrement sp so it's easier to access top of the stack
-       sp -= 2;
-       if (sp [0].val.type != STACK_VALUE_I4 && sp [0].val.type != STACK_VALUE_I8)
-               goto cfold_failed;
-       if (sp [1].val.type != STACK_VALUE_I4 && sp [1].val.type != STACK_VALUE_I8)
-               goto cfold_failed;
+       // ins should be a binop, therefore it should have a single dreg and two sregs
+       int dreg = ins->dreg;
+       int sreg1 = ins->sregs [0];
+       int sreg2 = ins->sregs [1];
+       LocalValue *val1 = &local_defs [sreg1];
+       LocalValue *val2 = &local_defs [sreg2];
+       LocalValue result;
+
+       if (val1->type != LOCAL_VALUE_I4 && val1->type != LOCAL_VALUE_I8)
+               return ins;
+       if (val2->type != LOCAL_VALUE_I4 && val2->type != LOCAL_VALUE_I8)
+               return ins;
 
        // Top two values of the stack are constants
        switch (ins->opcode) {
-               INTERP_FOLD_BINOP (MINT_ADD_I4, STACK_VALUE_I4, i, +);
-               INTERP_FOLD_BINOP (MINT_ADD_I8, STACK_VALUE_I8, l, +);
-               INTERP_FOLD_BINOP (MINT_SUB_I4, STACK_VALUE_I4, i, -);
-               INTERP_FOLD_BINOP (MINT_SUB_I8, STACK_VALUE_I8, l, -);
-               INTERP_FOLD_BINOP (MINT_MUL_I4, STACK_VALUE_I4, i, *);
-               INTERP_FOLD_BINOP (MINT_MUL_I8, STACK_VALUE_I8, l, *);
-
-               INTERP_FOLD_BINOP (MINT_AND_I4, STACK_VALUE_I4, i, &);
-               INTERP_FOLD_BINOP (MINT_AND_I8, STACK_VALUE_I8, l, &);
-               INTERP_FOLD_BINOP (MINT_OR_I4, STACK_VALUE_I4, i, |);
-               INTERP_FOLD_BINOP (MINT_OR_I8, STACK_VALUE_I8, l, |);
-               INTERP_FOLD_BINOP (MINT_XOR_I4, STACK_VALUE_I4, i, ^);
-               INTERP_FOLD_BINOP (MINT_XOR_I8, STACK_VALUE_I8, l, ^);
-
-               INTERP_FOLD_SHIFTOP (MINT_SHL_I4, STACK_VALUE_I4, i, <<, gint32);
-               INTERP_FOLD_SHIFTOP (MINT_SHL_I8, STACK_VALUE_I8, l, <<, gint64);
-               INTERP_FOLD_SHIFTOP (MINT_SHR_I4, STACK_VALUE_I4, i, >>, gint32);
-               INTERP_FOLD_SHIFTOP (MINT_SHR_I8, STACK_VALUE_I8, l, >>, gint64);
-               INTERP_FOLD_SHIFTOP (MINT_SHR_UN_I4, STACK_VALUE_I4, i, >>, guint32);
-               INTERP_FOLD_SHIFTOP (MINT_SHR_UN_I8, STACK_VALUE_I8, l, >>, guint64);
-
-               INTERP_FOLD_RELOP (MINT_CEQ_I4, STACK_VALUE_I4, i, ==, gint32);
-               INTERP_FOLD_RELOP (MINT_CEQ_I8, STACK_VALUE_I8, l, ==, gint64);
-               INTERP_FOLD_RELOP (MINT_CNE_I4, STACK_VALUE_I4, i, !=, gint32);
-               INTERP_FOLD_RELOP (MINT_CNE_I8, STACK_VALUE_I8, l, !=, gint64);
-
-               INTERP_FOLD_RELOP (MINT_CGT_I4, STACK_VALUE_I4, i, >, gint32);
-               INTERP_FOLD_RELOP (MINT_CGT_I8, STACK_VALUE_I8, l, >, gint64);
-               INTERP_FOLD_RELOP (MINT_CGT_UN_I4, STACK_VALUE_I4, i, >, guint32);
-               INTERP_FOLD_RELOP (MINT_CGT_UN_I8, STACK_VALUE_I8, l, >, guint64);
-
-               INTERP_FOLD_RELOP (MINT_CGE_I4, STACK_VALUE_I4, i, >=, gint32);
-               INTERP_FOLD_RELOP (MINT_CGE_I8, STACK_VALUE_I8, l, >=, gint64);
-               INTERP_FOLD_RELOP (MINT_CGE_UN_I4, STACK_VALUE_I4, i, >=, guint32);
-               INTERP_FOLD_RELOP (MINT_CGE_UN_I8, STACK_VALUE_I8, l, >=, guint64);
-
-               INTERP_FOLD_RELOP (MINT_CLT_I4, STACK_VALUE_I4, i, <, gint32);
-               INTERP_FOLD_RELOP (MINT_CLT_I8, STACK_VALUE_I8, l, <, gint64);
-               INTERP_FOLD_RELOP (MINT_CLT_UN_I4, STACK_VALUE_I4, i, <, guint32);
-               INTERP_FOLD_RELOP (MINT_CLT_UN_I8, STACK_VALUE_I8, l, <, guint64);
-
-               INTERP_FOLD_RELOP (MINT_CLE_I4, STACK_VALUE_I4, i, <=, gint32);
-               INTERP_FOLD_RELOP (MINT_CLE_I8, STACK_VALUE_I8, l, <=, gint64);
-               INTERP_FOLD_RELOP (MINT_CLE_UN_I4, STACK_VALUE_I4, i, <=, guint32);
-               INTERP_FOLD_RELOP (MINT_CLE_UN_I8, STACK_VALUE_I8, l, <=, guint64);
-
-               INTERP_FOLD_BINOP_FULL (MINT_DIV_I4, STACK_VALUE_I4, i, /, gint32, sp [1].val.i != 0 && (sp [0].val.i != G_MININT32 || sp [1].val.i != -1));
-               INTERP_FOLD_BINOP_FULL (MINT_DIV_I8, STACK_VALUE_I8, l, /, gint64, sp [1].val.l != 0 && (sp [0].val.l != G_MININT64 || sp [1].val.l != -1));
-               INTERP_FOLD_BINOP_FULL (MINT_DIV_UN_I4, STACK_VALUE_I4, i, /, guint32, sp [1].val.i != 0);
-               INTERP_FOLD_BINOP_FULL (MINT_DIV_UN_I8, STACK_VALUE_I8, l, /, guint64, sp [1].val.l != 0);
-
-               INTERP_FOLD_BINOP_FULL (MINT_REM_I4, STACK_VALUE_I4, i, %, gint32, sp [1].val.i != 0 && (sp [0].val.i != G_MININT32 || sp [1].val.i != -1));
-               INTERP_FOLD_BINOP_FULL (MINT_REM_I8, STACK_VALUE_I8, l, %, gint64, sp [1].val.l != 0 && (sp [0].val.l != G_MININT64 || sp [1].val.l != -1));
-               INTERP_FOLD_BINOP_FULL (MINT_REM_UN_I4, STACK_VALUE_I4, i, %, guint32, sp [1].val.i != 0);
-               INTERP_FOLD_BINOP_FULL (MINT_REM_UN_I8, STACK_VALUE_I8, l, %, guint64, sp [1].val.l != 0);
+               INTERP_FOLD_BINOP (MINT_ADD_I4, LOCAL_VALUE_I4, i, +);
+               INTERP_FOLD_BINOP (MINT_ADD_I8, LOCAL_VALUE_I8, l, +);
+               INTERP_FOLD_BINOP (MINT_SUB_I4, LOCAL_VALUE_I4, i, -);
+               INTERP_FOLD_BINOP (MINT_SUB_I8, LOCAL_VALUE_I8, l, -);
+               INTERP_FOLD_BINOP (MINT_MUL_I4, LOCAL_VALUE_I4, i, *);
+               INTERP_FOLD_BINOP (MINT_MUL_I8, LOCAL_VALUE_I8, l, *);
+
+               INTERP_FOLD_BINOP (MINT_AND_I4, LOCAL_VALUE_I4, i, &);
+               INTERP_FOLD_BINOP (MINT_AND_I8, LOCAL_VALUE_I8, l, &);
+               INTERP_FOLD_BINOP (MINT_OR_I4, LOCAL_VALUE_I4, i, |);
+               INTERP_FOLD_BINOP (MINT_OR_I8, LOCAL_VALUE_I8, l, |);
+               INTERP_FOLD_BINOP (MINT_XOR_I4, LOCAL_VALUE_I4, i, ^);
+               INTERP_FOLD_BINOP (MINT_XOR_I8, LOCAL_VALUE_I8, l, ^);
+
+               INTERP_FOLD_SHIFTOP (MINT_SHL_I4, LOCAL_VALUE_I4, i, <<, gint32);
+               INTERP_FOLD_SHIFTOP (MINT_SHL_I8, LOCAL_VALUE_I8, l, <<, gint64);
+               INTERP_FOLD_SHIFTOP (MINT_SHR_I4, LOCAL_VALUE_I4, i, >>, gint32);
+               INTERP_FOLD_SHIFTOP (MINT_SHR_I8, LOCAL_VALUE_I8, l, >>, gint64);
+               INTERP_FOLD_SHIFTOP (MINT_SHR_UN_I4, LOCAL_VALUE_I4, i, >>, guint32);
+               INTERP_FOLD_SHIFTOP (MINT_SHR_UN_I8, LOCAL_VALUE_I8, l, >>, guint64);
+
+               INTERP_FOLD_RELOP (MINT_CEQ_I4, LOCAL_VALUE_I4, i, ==, gint32);
+               INTERP_FOLD_RELOP (MINT_CEQ_I8, LOCAL_VALUE_I8, l, ==, gint64);
+               INTERP_FOLD_RELOP (MINT_CNE_I4, LOCAL_VALUE_I4, i, !=, gint32);
+               INTERP_FOLD_RELOP (MINT_CNE_I8, LOCAL_VALUE_I8, l, !=, gint64);
+
+               INTERP_FOLD_RELOP (MINT_CGT_I4, LOCAL_VALUE_I4, i, >, gint32);
+               INTERP_FOLD_RELOP (MINT_CGT_I8, LOCAL_VALUE_I8, l, >, gint64);
+               INTERP_FOLD_RELOP (MINT_CGT_UN_I4, LOCAL_VALUE_I4, i, >, guint32);
+               INTERP_FOLD_RELOP (MINT_CGT_UN_I8, LOCAL_VALUE_I8, l, >, guint64);
+
+               INTERP_FOLD_RELOP (MINT_CGE_I4, LOCAL_VALUE_I4, i, >=, gint32);
+               INTERP_FOLD_RELOP (MINT_CGE_I8, LOCAL_VALUE_I8, l, >=, gint64);
+               INTERP_FOLD_RELOP (MINT_CGE_UN_I4, LOCAL_VALUE_I4, i, >=, guint32);
+               INTERP_FOLD_RELOP (MINT_CGE_UN_I8, LOCAL_VALUE_I8, l, >=, guint64);
+
+               INTERP_FOLD_RELOP (MINT_CLT_I4, LOCAL_VALUE_I4, i, <, gint32);
+               INTERP_FOLD_RELOP (MINT_CLT_I8, LOCAL_VALUE_I8, l, <, gint64);
+               INTERP_FOLD_RELOP (MINT_CLT_UN_I4, LOCAL_VALUE_I4, i, <, guint32);
+               INTERP_FOLD_RELOP (MINT_CLT_UN_I8, LOCAL_VALUE_I8, l, <, guint64);
+
+               INTERP_FOLD_RELOP (MINT_CLE_I4, LOCAL_VALUE_I4, i, <=, gint32);
+               INTERP_FOLD_RELOP (MINT_CLE_I8, LOCAL_VALUE_I8, l, <=, gint64);
+               INTERP_FOLD_RELOP (MINT_CLE_UN_I4, LOCAL_VALUE_I4, i, <=, guint32);
+               INTERP_FOLD_RELOP (MINT_CLE_UN_I8, LOCAL_VALUE_I8, l, <=, guint64);
+
+               INTERP_FOLD_BINOP_FULL (MINT_DIV_I4, LOCAL_VALUE_I4, i, /, gint32, val2->i != 0 && (val1->i != G_MININT32 || val2->i != -1));
+               INTERP_FOLD_BINOP_FULL (MINT_DIV_I8, LOCAL_VALUE_I8, l, /, gint64, val2->l != 0 && (val1->l != G_MININT64 || val2->l != -1));
+               INTERP_FOLD_BINOP_FULL (MINT_DIV_UN_I4, LOCAL_VALUE_I4, i, /, guint32, val2->i != 0);
+               INTERP_FOLD_BINOP_FULL (MINT_DIV_UN_I8, LOCAL_VALUE_I8, l, /, guint64, val2->l != 0);
+
+               INTERP_FOLD_BINOP_FULL (MINT_REM_I4, LOCAL_VALUE_I4, i, %, gint32, val2->i != 0 && (val1->i != G_MININT32 || val2->i != -1));
+               INTERP_FOLD_BINOP_FULL (MINT_REM_I8, LOCAL_VALUE_I8, l, %, gint64, val2->l != 0 && (val1->l != G_MININT64 || val2->l != -1));
+               INTERP_FOLD_BINOP_FULL (MINT_REM_UN_I4, LOCAL_VALUE_I4, i, %, guint32, val2->i != 0);
+               INTERP_FOLD_BINOP_FULL (MINT_REM_UN_I8, LOCAL_VALUE_I8, l, %, guint64, val2->l != 0);
 
                default:
-                       goto cfold_failed;
+                       return ins;
        }
 
-       // We were able to compute the result of the ins instruction. We store the
-       // current value for the top of the stack and, if possible, try to replace the
-       // instructions that are part of this unary operation with a single LDC.
+       // We were able to compute the result of the ins instruction. We replace the binop
+       // with a LDC of the constant. We leave alone the sregs of this instruction, for
+       // deadce to kill the instructions initializing them.
        mono_interp_stats.constant_folds++;
-       if (sp [0].ins != NULL && sp [1].ins != NULL) {
-               interp_clear_ins (sp [0].ins);
-               interp_clear_ins (sp [1].ins);
-               mono_interp_stats.killed_instructions += 2;
-               if (result.type == STACK_VALUE_I4)
-                       ins = interp_get_ldc_i4_from_const (td, ins, result.i);
-               else if (result.type == STACK_VALUE_I8)
-                       ins = interp_inst_replace_with_i8_const (td, ins, result.l);
-               else
-                       g_assert_not_reached ();
-               if (td->verbose_level) {
-                       g_print ("Fold binop :\n\t");
-                       dump_interp_inst (ins);
-               }
-               sp [0].ins = ins;
-       } else {
-               sp [0].ins = NULL;
+
+       if (result.type == LOCAL_VALUE_I4)
+               ins = interp_get_ldc_i4_from_const (td, ins, result.i, dreg);
+       else if (result.type == LOCAL_VALUE_I8)
+               ins = interp_inst_replace_with_i8_const (td, ins, result.l);
+       else
+               g_assert_not_reached ();
+
+       if (td->verbose_level) {
+               g_print ("Fold binop :\n\t");
+               dump_interp_inst (ins);
        }
-       sp [0].val = result;
-       return ins;
 
-cfold_failed:
-       sp->ins = NULL;
-       sp->val.type = STACK_VALUE_NONE;
+       local_ref_count [sreg1]--;
+       local_ref_count [sreg2]--;
+       local_defs [dreg] = result;
        return ins;
 }
 
-#define INTERP_FOLD_BINOP_BR(_opcode,_stack_type,_cond) \
+// Due to poor current design, the branch op might not be the last instruction in the bblock
+// (in case we fallthrough and need to have the stack locals match the ones from next_bb, done
+// in fixup_newbb_stack_locals). If that's the case, clear all these mov's. This helps bblock
+// merging quickly find the MINT_BR_S opcode.
+#define INTERP_FOLD_BINOP_BR(_opcode,_local_type,_cond) \
        case _opcode: \
-               g_assert (sp [0].val.type == _stack_type); \
-               g_assert (sp [1].val.type == _stack_type); \
+               g_assert (val1->type == _local_type); \
+               g_assert (val2->type == _local_type); \
                if (_cond) { \
                        ins->opcode = MINT_BR_S; \
                        if (cbb->next_bb != ins->info.target_bb) \
                                interp_unlink_bblocks (cbb, cbb->next_bb); \
+                       for (InterpInst *it = ins->next; it != NULL; it = it->next) \
+                               interp_clear_ins (it); \
                } else { \
                        interp_clear_ins (ins); \
                        interp_unlink_bblocks (cbb, ins->info.target_bb); \
@@ -7463,87 +7920,66 @@ cfold_failed:
                break;
 
 static InterpInst*
-interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, StackContentInfo *sp, InterpInst *ins)
+interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
 {
-       sp -= 2;
-       // If we can't remove the instructions pushing the constants, don't bother
-       if (sp [0].ins == NULL || sp [1].ins == NULL)
-               return ins;
-       if (sp [0].val.type != STACK_VALUE_I4 && sp [0].val.type != STACK_VALUE_I8)
+       // ins should be a conditional binop, therefore it should have only two sregs
+       int sreg1 = ins->sregs [0];
+       int sreg2 = ins->sregs [1];
+       LocalValue *val1 = &local_defs [sreg1];
+       LocalValue *val2 = &local_defs [sreg2];
+
+       if (val1->type != LOCAL_VALUE_I4 && val1->type != LOCAL_VALUE_I8)
                return ins;
-       if (sp [1].val.type != STACK_VALUE_I4 && sp [1].val.type != STACK_VALUE_I8)
+       if (val2->type != LOCAL_VALUE_I4 && val2->type != LOCAL_VALUE_I8)
                return ins;
 
        switch (ins->opcode) {
-               INTERP_FOLD_BINOP_BR (MINT_BEQ_I4_S, STACK_VALUE_I4, sp [0].val.i == sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BEQ_I8_S, STACK_VALUE_I8, sp [0].val.l == sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BGE_I4_S, STACK_VALUE_I4, sp [0].val.i >= sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BGE_I8_S, STACK_VALUE_I8, sp [0].val.l >= sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BGT_I4_S, STACK_VALUE_I4, sp [0].val.i > sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BGT_I8_S, STACK_VALUE_I8, sp [0].val.l > sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BLT_I4_S, STACK_VALUE_I4, sp [0].val.i < sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BLT_I8_S, STACK_VALUE_I8, sp [0].val.l < sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BLE_I4_S, STACK_VALUE_I4, sp [0].val.i <= sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BLE_I8_S, STACK_VALUE_I8, sp [0].val.l <= sp [1].val.l);
-
-               INTERP_FOLD_BINOP_BR (MINT_BNE_UN_I4_S, STACK_VALUE_I4, sp [0].val.i != sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BNE_UN_I8_S, STACK_VALUE_I8, sp [0].val.l != sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BGE_UN_I4_S, STACK_VALUE_I4, (guint32)sp [0].val.i >= (guint32)sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BGE_UN_I8_S, STACK_VALUE_I8, (guint64)sp [0].val.l >= (guint64)sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BGT_UN_I4_S, STACK_VALUE_I4, (guint32)sp [0].val.i > (guint32)sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BGT_UN_I8_S, STACK_VALUE_I8, (guint64)sp [0].val.l > (guint64)sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BLE_UN_I4_S, STACK_VALUE_I4, (guint32)sp [0].val.i <= (guint32)sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BLE_UN_I8_S, STACK_VALUE_I8, (guint64)sp [0].val.l <= (guint64)sp [1].val.l);
-               INTERP_FOLD_BINOP_BR (MINT_BLT_UN_I4_S, STACK_VALUE_I4, (guint32)sp [0].val.i < (guint32)sp [1].val.i);
-               INTERP_FOLD_BINOP_BR (MINT_BLT_UN_I8_S, STACK_VALUE_I8, (guint64)sp [0].val.l < (guint64)sp [1].val.l);
+               INTERP_FOLD_BINOP_BR (MINT_BEQ_I4_S, LOCAL_VALUE_I4, val1->i == val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BEQ_I8_S, LOCAL_VALUE_I8, val1->l == val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BGE_I4_S, LOCAL_VALUE_I4, val1->i >= val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BGE_I8_S, LOCAL_VALUE_I8, val1->l >= val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BGT_I4_S, LOCAL_VALUE_I4, val1->i > val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BGT_I8_S, LOCAL_VALUE_I8, val1->l > val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BLT_I4_S, LOCAL_VALUE_I4, val1->i < val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BLT_I8_S, LOCAL_VALUE_I8, val1->l < val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BLE_I4_S, LOCAL_VALUE_I4, val1->i <= val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BLE_I8_S, LOCAL_VALUE_I8, val1->l <= val2->l);
+
+               INTERP_FOLD_BINOP_BR (MINT_BNE_UN_I4_S, LOCAL_VALUE_I4, val1->i != val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BNE_UN_I8_S, LOCAL_VALUE_I8, val1->l != val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BGE_UN_I4_S, LOCAL_VALUE_I4, (guint32)val1->i >= (guint32)val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BGE_UN_I8_S, LOCAL_VALUE_I8, (guint64)val1->l >= (guint64)val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BGT_UN_I4_S, LOCAL_VALUE_I4, (guint32)val1->i > (guint32)val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BGT_UN_I8_S, LOCAL_VALUE_I8, (guint64)val1->l > (guint64)val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BLE_UN_I4_S, LOCAL_VALUE_I4, (guint32)val1->i <= (guint32)val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BLE_UN_I8_S, LOCAL_VALUE_I8, (guint64)val1->l <= (guint64)val2->l);
+               INTERP_FOLD_BINOP_BR (MINT_BLT_UN_I4_S, LOCAL_VALUE_I4, (guint32)val1->i < (guint32)val2->i);
+               INTERP_FOLD_BINOP_BR (MINT_BLT_UN_I8_S, LOCAL_VALUE_I8, (guint64)val1->l < (guint64)val2->l);
 
                default:
                        return ins;
        }
+       if (td->verbose_level) {
+               g_print ("Fold binop cond br :\n\t");
+               dump_interp_inst (ins);
+       }
+
        mono_interp_stats.constant_folds++;
-       mono_interp_stats.killed_instructions += 2;
-       interp_clear_ins (sp [0].ins);
-       interp_clear_ins (sp [1].ins);
-       sp [0].val.type = STACK_VALUE_NONE;
-       sp [1].val.type = STACK_VALUE_NONE;
+       local_ref_count [sreg1]--;
+       local_ref_count [sreg2]--;
        return ins;
 }
 
-static gboolean
-interp_local_equal (StackValue *locals, int local1, int local2)
-{
-       if (local1 == local2)
-               return TRUE;
-       if (locals [local1].type == STACK_VALUE_LOCAL && locals [local1].local == local2) {
-               // local1 is a copy of local2
-               return TRUE;
-       }
-       if (locals [local2].type == STACK_VALUE_LOCAL && locals [local2].local == local1) {
-               // local2 is a copy of local1
-               return TRUE;
-       }
-       if (locals [local1].type == STACK_VALUE_I4 && locals [local2].type == STACK_VALUE_I4)
-               return locals [local1].i == locals [local2].i;
-       if (locals [local1].type == STACK_VALUE_I8 && locals [local2].type == STACK_VALUE_I8)
-               return locals [local1].l == locals [local2].l;
-       return FALSE;
-}
-
 static void
 interp_cprop (TransformData *td)
 {
-       if (!td->max_stack_height)
-               return;
-       StackContentInfo *stack = (StackContentInfo*) g_malloc (td->max_stack_height * sizeof (StackContentInfo));
-       StackContentInfo *stack_end = stack + td->max_stack_height;
-       StackContentInfo *sp;
-       StackValue *locals = (StackValue*) g_malloc (td->locals_size * sizeof (StackValue));
+       LocalValue *local_defs = (LocalValue*) g_malloc (td->locals_size * sizeof (LocalValue));
        int *local_ref_count = (int*) g_malloc (td->locals_size * sizeof (int));
        InterpBasicBlock *bb;
        gboolean needs_retry;
+       int ins_index;
 
 retry:
-       sp = stack;
        memset (local_ref_count, 0, td->locals_size * sizeof (int));
 
        if (td->verbose_level)
@@ -7551,347 +7987,207 @@ retry:
 
        for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
                InterpInst *ins;
-               // Optimizations take place only inside a single basic block
-               if (bb->stack_height >= 0) {
-                       sp = stack + bb->stack_height;
-                       g_assert (sp <= stack_end);
-                       memset (stack, 0, (sp - stack) * sizeof (StackContentInfo));
-               }
-               memset (locals, 0, td->locals_size * sizeof (StackValue));
+               ins_index = 0;
+
+               // Set cbb since we do some instruction inserting below
+               td->cbb = bb;
+
+               // FIXME This is excessive. Remove this once we have SSA
+               memset (local_defs, 0, td->locals_size * sizeof (LocalValue));
 
                if (td->verbose_level)
                        g_print ("BB%d\n", bb->index);
 
                for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
-               int pop, push;
-               // The instruction pops some values then pushes some other
-               get_inst_stack_usage (td, ins, &pop, &push);
-               if (td->verbose_level && ins->opcode != MINT_NOP) {
-                       dump_interp_inst_no_newline (ins);
-                       g_print (", sp %d, (pop %d, push %d)\n", sp - stack, pop, push);
-               }
-               if (MINT_IS_LDLOC (ins->opcode)) {
-                       int replace_op = 0;
-                       int loaded_local = ins->data [0];
-                       local_ref_count [loaded_local]++;
-                       InterpInst *prev_ins = interp_prev_ins (ins);
-                       if (prev_ins && MINT_IS_STLOC (prev_ins->opcode) && interp_local_equal (locals, prev_ins->data [0], loaded_local)) {
-                               int mt = prev_ins->opcode - MINT_STLOC_I1;
-                               if (ins->opcode - MINT_LDLOC_I1 == mt) {
-                                       if (mt == MINT_TYPE_I4)
-                                               replace_op = MINT_STLOC_NP_I4;
-                                       else if (mt == MINT_TYPE_I8)
-                                               replace_op = MINT_STLOC_NP_I8;
-                                       else if (mt == MINT_TYPE_R4)
-                                               replace_op = MINT_STLOC_NP_R4;
-                                       else if (mt == MINT_TYPE_R8)
-                                               replace_op = MINT_STLOC_NP_R8;
-                                       else if (mt == MINT_TYPE_O)
-                                               replace_op = MINT_STLOC_NP_O;
-                                       if (replace_op) {
-                                               int stored_local = prev_ins->data [0];
-                                               sp->ins = NULL;
-                                               if (sp->val.type == STACK_VALUE_NONE && !td->locals [stored_local].indirects) {
-                                                       // We know what local is on the stack now. Track it
-                                                       sp->val.type = STACK_VALUE_LOCAL;
-                                                       sp->val.local = stored_local;
-                                               }
+                       int opcode = ins->opcode;
 
-                                               // Clear the previous stloc instruction
-                                               interp_clear_ins (prev_ins);
-                                               ins->opcode = replace_op;
-                                               ins->data [0] = stored_local;
-                                               local_ref_count [loaded_local]--;
-                                               if (td->verbose_level) {
-                                                       g_print ("Add stloc.np :\n\t");
-                                                       dump_interp_inst (ins);
-                                               }
-                                               mono_interp_stats.stloc_nps++;
-                                               mono_interp_stats.killed_instructions++;
-                                       }
+                       if (opcode == MINT_NOP)
+                               continue;
+
+                       int num_sregs = mono_interp_op_sregs [opcode];
+                       int num_dregs = mono_interp_op_dregs [opcode];
+                       gint32 *sregs = &ins->sregs [0];
+                       gint32 dreg = ins->dreg;
+
+                       if (td->verbose_level && ins->opcode != MINT_NOP)
+                               dump_interp_inst (ins);
+
+                       for (int i = 0; i < num_sregs; i++) {
+                               // FIXME MINT_PROF_EXIT when void
+                               if (sregs [i] == -1)
+                                       continue;
+                               local_ref_count [sregs [i]]++;
+                               if (local_defs [sregs [i]].type == LOCAL_VALUE_LOCAL) {
+                                       int cprop_local = local_defs [sregs [i]].local;
+                                       // We are not allowed to extend the liveness of execution stack locals because
+                                       // it can end up conflicting with another such local. Once we will have our
+                                       // own offset allocator for these locals, this restriction can be lifted.
+                                       if (td->locals [cprop_local].flags & INTERP_LOCAL_FLAG_EXECUTION_STACK)
+                                               continue;
+
+                                       // We are trying to replace sregs [i] with its def local (cprop_local), but cprop_local has since been
+                                       // modified, so we can't use it.
+                                       if (local_defs [cprop_local].ins != NULL && local_defs [cprop_local].def_index > local_defs [sregs [i]].def_index)
+                                               continue;
+
+                                       if (td->verbose_level)
+                                               g_print ("cprop %d -> %d:\n\t", sregs [i], cprop_local);
+                                       local_ref_count [sregs [i]]--;
+                                       sregs [i] = cprop_local;
+                                       local_ref_count [cprop_local]++;
+                                       if (td->verbose_level)
+                                               dump_interp_inst (ins);
                                }
                        }
-                       /* If we didn't replace this ldloc with a stloc.np, try other optimizations */
-                       if (!replace_op) {
-                               if (locals [loaded_local].type == STACK_VALUE_LOCAL) {
-                                       g_assert (!td->locals [loaded_local].indirects);
-                                       // do copy propagation of the original source
-                                       mono_interp_stats.copy_propagations++;
-                                       local_ref_count [loaded_local]--;
-                                       // We can't propagate a local that has its address taken
-                                       g_assert (!td->locals [locals [loaded_local].local].indirects);
-                                       ins->data [0] = locals [loaded_local].local;
-                                       local_ref_count [ins->data [0]]++;
-                                       if (td->verbose_level) {
-                                               g_print ("cprop loc %d -> loc %d :\n\t", loaded_local, locals [loaded_local].local);
-                                               dump_interp_inst (ins);
+
+                       if (num_dregs) {
+                               local_defs [dreg].type = LOCAL_VALUE_NONE;
+                               local_defs [dreg].ins = ins;
+                               local_defs [dreg].def_index = ins_index;
+                       }
+
+                       if (opcode == MINT_MOV_4 || opcode == MINT_MOV_8 || opcode == MINT_MOV_VT) {
+                               int sreg = sregs [0];
+                               if (dreg == sreg) {
+                                       if (td->verbose_level)
+                                               g_print ("clear redundant mov\n");
+                                       interp_clear_ins (ins);
+                                       local_ref_count [sreg]--;
+                               } else if (td->locals [sreg].indirects || td->locals [dreg].indirects) {
+                                       // Don't bother with indirect locals
+                               } else if (local_defs [sreg].type == LOCAL_VALUE_I4 || local_defs [sreg].type == LOCAL_VALUE_I8) {
+                                       // Replace mov with ldc
+                                       gboolean is_i4 = local_defs [sreg].type == LOCAL_VALUE_I4;
+                                       g_assert (!td->locals [sreg].indirects);
+                                       local_defs [dreg].type = local_defs [sreg].type;
+                                       if (is_i4) {
+                                               int ct = local_defs [sreg].i;
+                                               ins = interp_get_ldc_i4_from_const (td, ins, ct, dreg);
+                                               local_defs [dreg].i = ct;
+                                       } else {
+                                               gint64 ct = local_defs [sreg].l;
+                                               ins = interp_inst_replace_with_i8_const (td, ins, ct);
+                                               local_defs [dreg].l = ct;
                                        }
-                               } else if (locals [loaded_local].type == STACK_VALUE_I4 || locals [loaded_local].type == STACK_VALUE_I8) {
-                                       gboolean is_i4 = locals [loaded_local].type == STACK_VALUE_I4;
-                                       g_assert (!td->locals [loaded_local].indirects);
-                                       if (is_i4)
-                                               ins = interp_get_ldc_i4_from_const (td, ins, locals [loaded_local].i);
-                                       else
-                                               ins = interp_inst_replace_with_i8_const (td, ins, locals [loaded_local].l);
-                                       sp->ins = ins;
-                                       sp->val = locals [loaded_local];
-                                       local_ref_count [loaded_local]--;
+                                       local_defs [dreg].ins = ins;
+                                       local_ref_count [sreg]--;
                                        mono_interp_stats.copy_propagations++;
                                        if (td->verbose_level) {
-                                               g_print ("cprop loc %d -> ct :\n\t", loaded_local);
+                                               g_print ("cprop loc %d -> ct :\n\t", sreg);
                                                dump_interp_inst (ins);
                                        }
-                                       // FIXME this replace_op got ugly
-                                       replace_op = ins->opcode;
-                               }
-                       }
-                       if (!replace_op) {
-                               // Save the ldloc on the stack if it wasn't optimized away
-                               // For simplicity we don't track locals that have their address taken
-                               // since it is hard to detect instructions that change the local value.
-                               if (td->locals [loaded_local].indirects) {
-                                       sp->val.type = STACK_VALUE_NONE;
-                               } else {
-                                       sp->val.type = STACK_VALUE_LOCAL;
-                                       sp->val.local = ins->data [0];
-                               }
-                               sp->ins = ins;
-                       }
-                       sp++;
-               } else if (MINT_IS_STLOC (ins->opcode)) {
-                       int dest_local = ins->data [0];
-                       sp--;
-                       if (sp->val.type == STACK_VALUE_LOCAL) {
-                               int src_local = sp->val.local;
-                               if (td->locals [src_local].mt == td->locals [dest_local].mt) {
-                                       // The locals have the same type. We can propagate the value
-                                       int vtsize = (ins->opcode == MINT_STLOC_VT) ? ins->data [1] : 0;
-
-                                       if (!td->locals [dest_local].indirects) {
-                                               // Track what exactly is stored into local
-                                               locals [dest_local].type = STACK_VALUE_LOCAL;
-                                               locals [dest_local].local = src_local;
-                                       }
+                               } else if (local_defs [sreg].ins != NULL &&
+                                               (td->locals [sreg].flags & INTERP_LOCAL_FLAG_EXECUTION_STACK) &&
+                                               !(td->locals [sreg].flags & INTERP_LOCAL_FLAG_CALL_ARGS) &&
+                                               !(td->locals [dreg].flags & INTERP_LOCAL_FLAG_EXECUTION_STACK) &&
+                                               interp_prev_ins (ins) == local_defs [sreg].ins) {
+                                       // hackish temporary optimization that won't be necessary in the future
+                                       // We replace `local1 <- ?, local2 <- local1` with `local2 <- ?, local1 <- local2`
+                                       // if local1 is execution stack local and local2 is normal global local. This makes
+                                       // it more likely for `local1 <- local2` to be killed, while before we always needed
+                                       // to store to the global local, which is likely accessed by other instructions.
+                                       InterpInst *def = local_defs [sreg].ins;
+                                       int original_dreg = def->dreg;
+
+                                       def->dreg = dreg;
+                                       ins->dreg = original_dreg;
+                                       sregs [0] = dreg;
+
+                                       local_defs [dreg].type = LOCAL_VALUE_NONE;
+                                       local_defs [dreg].ins = def;
+                                       local_defs [original_dreg].type = LOCAL_VALUE_LOCAL;
+                                       local_defs [original_dreg].ins = ins;
+                                       local_defs [original_dreg].local = dreg;
+
+                                       local_ref_count [original_dreg]--;
+                                       local_ref_count [dreg]++;
 
-                                       if (sp->ins) {
-                                               // If the top of stack is not pushed by a ldloc, we are introducing a
-                                               // new dependency on the src_local since we are adding a movloc from it.
-                                               if (!MINT_IS_LDLOC (sp->ins->opcode))
-                                                       local_ref_count [src_local]++;
-                                               interp_clear_ins (sp->ins);
-                                               interp_clear_ins (ins);
-
-                                               ins = interp_insert_ins_bb (td, bb, ins, get_movloc_for_type (td->locals [src_local].mt));
-                                               ins->data [0] = src_local;
-                                               ins->data [1] = dest_local;
-                                               if (vtsize)
-                                                       ins->data [2] = vtsize;
-                                               // Clear ldloc / stloc pair and replace it with movloc superinstruction
-                                               if (td->verbose_level) {
-                                                       g_print ("Add movloc (ldloc off %d) :\n\t", sp->ins->il_offset);
-                                                       dump_interp_inst (ins);
-                                               }
-                                               mono_interp_stats.movlocs++;
-                                               mono_interp_stats.killed_instructions++;
+                                       if (td->verbose_level) {
+                                               g_print ("cprop dreg:\n\t");
+                                               dump_interp_inst (def);
+                                               g_print ("\t");
+                                               dump_interp_inst (ins);
                                        }
                                } else {
-                                       locals [dest_local].type = STACK_VALUE_NONE;
+                                       if (td->verbose_level)
+                                               g_print ("local copy %d <- %d\n", dreg, sreg);
+                                       local_defs [dreg].type = LOCAL_VALUE_LOCAL;
+                                       local_defs [dreg].local = sreg;
                                }
-                       } else if (sp->val.type == STACK_VALUE_NONE) {
-                               locals [dest_local].type = STACK_VALUE_NONE;
-                       } else {
-                               g_assert (sp->val.type == STACK_VALUE_I4 || sp->val.type == STACK_VALUE_I8);
-                               if (!td->locals [dest_local].indirects)
-                                       locals [dest_local] = sp->val;
-                       }
-                       clear_stack_content_info_for_local (stack, sp, dest_local);
-                       clear_local_content_info_for_local (locals, locals + td->locals_size, dest_local);
-               } else if (MINT_IS_LDC_I4 (ins->opcode) || ins->opcode == MINT_LDC_I8) {
-                       StackValue val;
-                       gboolean is_i8 = ins->opcode == MINT_LDC_I8;
-                       InterpInst *prev_ins = interp_prev_ins (ins);
-
-                       if (is_i8) {
-                               val.type = STACK_VALUE_I8;
-                               val.l = READ64 (&ins->data [0]);
-                       } else {
-                               val.type = STACK_VALUE_I4;
-                               val.i = interp_get_const_from_ldc_i4 (ins);
-                       }
-
-                       if (prev_ins && prev_ins->opcode == MINT_POP &&
-                                       ((is_i8 && sp->val.type == STACK_VALUE_I8 && sp->val.l == val.l) ||
-                                       (!is_i8 && sp->val.type == STACK_VALUE_I4 && sp->val.i == val.i))) {
-                               // The previous instruction pops the stack of the value we are pushing
-                               // right now. We can kill both instructions
-                               if (td->verbose_level)
-                                       g_print ("Kill redundant pop/ldc pair: pop (off %p), ldc (off %p)\n", prev_ins->il_offset, ins->il_offset);
-                               interp_clear_ins (prev_ins);
-                               interp_clear_ins (ins);
-                               mono_interp_stats.killed_instructions += 2;
-                       } else {
-                               sp->ins = ins;
-                               sp->val = val;
-                       }
-                       sp++;
-               } else if (ins->opcode == MINT_MONO_LDPTR) {
-                       StackValue val;
+                       } else if (opcode == MINT_LDLOCA_S) {
+                               // The local that we are taking the address of is not a sreg but still referenced
+                               local_ref_count [ins->sregs [0]]++;
+                       } else if (MINT_IS_LDC_I4 (opcode)) {
+                               local_defs [dreg].type = LOCAL_VALUE_I4;
+                               local_defs [dreg].i = interp_get_const_from_ldc_i4 (ins);
+                       } else if (opcode == MINT_LDC_I8) {
+                               local_defs [dreg].type = LOCAL_VALUE_I8;
+                               local_defs [dreg].l = READ64 (&ins->data [0]);
+                       } else if (ins->opcode == MINT_MONO_LDPTR) {
 #if SIZEOF_VOID_P == 8
-                       val.type = STACK_VALUE_I8;
-                       val.l = (gint64)td->data_items [ins->data [0]];
+                               local_defs [dreg].type = LOCAL_VALUE_I8;
+                               local_defs [dreg].l = (gint64)td->data_items [ins->data [0]];
 #else
-                       val.type = STACK_VALUE_I4;
-                       val.i = (gint32)td->data_items [ins->data [0]];
+                               local_defs [dreg].type = LOCAL_VALUE_I4;
+                               local_defs [dreg].i = (gint32)td->data_items [ins->data [0]];
 #endif
-                       sp->ins = ins;
-                       sp->val = val;
-                       sp++;
-               } else if (MINT_IS_MOVLOC (ins->opcode)) {
-                       int src_local = ins->data [0];
-                       int dest_local = ins->data [1];
-                       local_ref_count [src_local]++;
-                       if (!td->locals [dest_local].indirects) {
-                               if (locals [src_local].type != STACK_VALUE_NONE) {
-                                       locals [dest_local] = locals [src_local];
-                               } else {
-                                       locals [dest_local].type = STACK_VALUE_LOCAL;
-                                       locals [dest_local].local = src_local;
+                       } else if (MINT_IS_UNOP (opcode) || (opcode >= MINT_MOV_I1 && opcode <= MINT_MOV_U2)) {
+                               ins = interp_fold_unop (td, local_defs, local_ref_count, ins);
+                       } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode)) {
+                               ins = interp_fold_unop_cond_br (td, bb, local_defs, local_ref_count, ins);
+                       } else if (MINT_IS_BINOP (opcode)) {
+                               ins = interp_fold_binop (td, local_defs, local_ref_count, ins);
+                       } else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode)) {
+                               ins = interp_fold_binop_cond_br (td, bb, local_defs, local_ref_count, ins);
+                       } else if ((ins->opcode == MINT_NEWOBJ_FAST || ins->opcode == MINT_NEWOBJ_VT_FAST) && ins->data [0] == INLINED_METHOD_FLAG) {
+                               // FIXME Drop the CALL_ARGS flag on the params so this will no longer be necessary
+                               int param_count = ins->data [3];
+                               int *newobj_reg_map = ins->info.newobj_reg_map;
+                               for (int i = 0; i < param_count; i++) {
+                                       int src = newobj_reg_map [2 * i];
+                                       int dst = newobj_reg_map [2 * i + 1];
+                                       local_defs [dst] = local_defs [src];
+                                       local_defs [dst].ins = NULL;
                                }
-                               clear_stack_content_info_for_local (stack, sp, dest_local);
-                               clear_local_content_info_for_local (locals, locals + td->locals_size, dest_local);
-                       }
-               } else if (MINT_IS_STLOC_NP (ins->opcode)) {
-                       int dest_local = ins->data [0];
-                       // Prevent optimizing away the instruction that pushed the value on the stack
-                       sp [-1].ins = NULL;
-                       // The local contains the value of the top of stack
-                       if (!td->locals [dest_local].indirects) {
-                               locals [dest_local] = sp [-1].val;
-                               clear_stack_content_info_for_local (stack, sp, dest_local);
-                               clear_local_content_info_for_local (locals, locals + td->locals_size, dest_local);
-                       }
-               } else if (ins->opcode == MINT_DUP || ins->opcode == MINT_DUP_VT) {
-                       sp [0].val = sp [-1].val;
-                       sp [0].ins = ins;
-                       // If top of stack is known, we could also replace dup with an explicit
-                       // propagated instruction, so we remove the top of stack dependency
-                       sp [-1].ins = NULL;
-                       sp++;
-               } else if (ins->opcode == MINT_BOX_PTR || ins->opcode == MINT_BOX_NULLABLE_PTR) {
-                       // These opcodes violate the stack based design, just clear the whole stack
-                       // for now since we will get rid of the stack design anyway.
-                       memset (stack, 0, (sp - stack) * sizeof (StackContentInfo));
-               } else if (ins->opcode == MINT_CKNULL_N) {
-                       // This opcode violates the stack based design, just clear the whole stack
-                       // for now since we will get rid of the stack design anyway.
-                       for (StackContentInfo *spit = stack; spit < sp; spit++)
-                               spit->ins = NULL;
-               } else if (ins->opcode == MINT_POP || ins->opcode == MINT_POP_VT) {
-                       sp--;
-                       if (sp->ins) {
-                               // The top of the stack is not used by any instructions. Kill both the
-                               // instruction that pushed it and the pop.
-                               interp_clear_ins (sp->ins);
-                               interp_clear_ins (ins);
-                               mono_interp_stats.killed_instructions += 2;
-                               // The value pop-ed by this instruction can still be accessed. If we also
-                               // kill the instruction pushing the value, then we need to empty the
-                               // value of the stack, so it is not considered for further optimizations.
-                               sp->val.type = STACK_VALUE_NONE;
-                       }
-               } else if ((ins->opcode == MINT_NEWOBJ_FAST || ins->opcode == MINT_NEWOBJ_VT_FAST) && ins->data [0] == INLINED_METHOD_FLAG) {
-                       int param_count = ins->data [3];
-                       // memmove the stack values while clearing ins, to prevent instruction removal
-                       for (int i = 1; i <= param_count; i++) {
-                               sp [-i + 2] = sp [-i];
-                               sp [-i + 2].ins = NULL;
-                       }
-                       // clear stack information for the slots where the allocated object resides
-                       memset (&sp [-param_count], 0, 2 * sizeof (StackContentInfo));
-                       sp += 2;
-               } else if (ins->opcode == MINT_CASTCLASS || ins->opcode == MINT_CASTCLASS_COMMON || ins->opcode == MINT_CASTCLASS_INTERFACE) {
-                       // Keep the value on the stack, but prevent optimizing away
-                       sp [-1].ins = NULL;
-               } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (ins->opcode)) {
-                       ins = interp_fold_unop_cond_br (td, bb, sp, ins);
-                       sp--;
-               } else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (ins->opcode)) {
-                       ins = interp_fold_binop_cond_br (td, bb, sp, ins);
-                       sp -= 2;
-               } else if (MINT_IS_UNOP (ins->opcode)) {
-                       ins = interp_fold_unop (td, sp, ins);
-               } else if (MINT_IS_BINOP (ins->opcode)) {
-                       ins = interp_fold_binop (td, sp, ins);
-                       sp--;
-               } else if (ins->opcode == MINT_LDLOCA_S && ins->next && MINT_IS_LDFLD (ins->next->opcode) &&
-                               td->locals [ins->data [0]].mt == (ins->next->opcode - MINT_LDFLD_I1) &&
-                               ins->next->data [0] == 0) {
-                       int mt = ins->next->opcode - MINT_LDFLD_I1;
-                       int local = ins->data [0];
-                       // Replace LDLOCA + LDFLD with LDLOC, when the storing field represents
-                       // the entire local. This is the case with storing to the only field of
-                       // an IntPtr. We don't handle value type loads.
-                       ins->next->opcode = MINT_LDLOC_I1 + mt;
-                       ins->next->data [0] = local;
-                       td->locals [local].indirects--;
-                       interp_clear_ins (ins);
-                       mono_interp_stats.killed_instructions++;
-                       mono_interp_stats.ldlocas_removed++;
-                       if (td->verbose_level) {
-                               g_print ("Replace ldloca/ldfld pair :\n\t");
-                               dump_interp_inst (ins->next);
-                       }
-               } else if (ins->opcode >= MINT_STFLD_I1 && ins->opcode <= MINT_STFLD_O) {
-                       StackContentInfo *src = &sp [-2];
-                       if (src->ins) {
-                               if (src->ins->opcode == MINT_LDLOCA_S && td->locals [src->ins->data [0]].mt == (ins->opcode - MINT_STFLD_I1) &&
-                                               ins->data [0] == 0) {
+                       } else if (MINT_IS_LDFLD (opcode) && ins->data [0] == 0) {
+                               InterpInst *ldloca = local_defs [sregs [0]].ins;
+                               if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S &&
+                                               td->locals [ldloca->sregs [0]].mt == (ins->opcode - MINT_LDFLD_I1)) {
+                                       int mt = ins->opcode - MINT_LDFLD_I1;
+                                       int local = ldloca->sregs [0];
+                                       // Replace LDLOCA + LDFLD with LDLOC, when the loading field represents
+                                       // the entire local. This is the case with loading the only field of an
+                                       // IntPtr. We don't handle value type loads.
+                                       ins->opcode = get_mov_for_type (mt, TRUE);
+                                       // The dreg of the MOV is the same as the dreg of the LDFLD
+                                       local_ref_count [sregs [0]]--;
+                                       sregs [0] = local;
+
+                                       if (td->verbose_level) {
+                                               g_print ("Replace ldloca/ldfld pair :\n\t");
+                                               dump_interp_inst (ins->next);
+                                       }
+                               }
+                       } else if (MINT_IS_STFLD (opcode) && ins->data [0] == 0) {
+                               InterpInst *ldloca = local_defs [sregs [0]].ins;
+                               if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S &&
+                                               td->locals [ldloca->sregs [0]].mt == (ins->opcode - MINT_STFLD_I1)) {
                                        int mt = ins->opcode - MINT_STFLD_I1;
-                                       int local = src->ins->data [0];
-                                       interp_clear_ins (src->ins);
-                                       ins->opcode = MINT_STLOC_I1 + mt;
-                                       ins->data [0] = local;
-                                       td->locals [local].indirects--;
-                                       mono_interp_stats.killed_instructions++;
-                                       mono_interp_stats.ldlocas_removed++;
-                                       // FIXME Update stack contents for stloc, we currently rely on cprop running again.
-                                       clear_stack_content_info_for_local (stack, sp, local);
-                                       clear_local_content_info_for_local (locals, locals + td->locals_size, local);
+                                       int local = ldloca->sregs [0];
+
+                                       ins->opcode = get_mov_for_type (mt, FALSE);
+                                       // The sreg of the MOV is the same as the second sreg of the STFLD
+                                       local_ref_count [sregs [0]]--;
+                                       ins->dreg = local;
+                                       sregs [0] = sregs [1];
 
                                        if (td->verbose_level) {
-                                               g_print ("Replace ldloca/stfld pair (off %p) :\n\t", src->ins->il_offset);
+                                               g_print ("Replace ldloca/stfld pair (off %p) :\n\t", ldloca->il_offset);
                                                dump_interp_inst (ins);
                                        }
-                               } else if (src->val.type == STACK_VALUE_LOCAL && (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS)) {
-                                       int loc_index = src->val.local;
-                                       int fld_offset = ins->data [0];
-                                       int mt = ins->opcode - MINT_STFLD_I1;
-                                       ins = interp_insert_ins_bb (td, bb, ins, MINT_STLOCFLD_I1 + mt);
-                                       ins->data [0] = loc_index;
-                                       ins->data [1] = fld_offset;
-                                       local_ref_count [loc_index]++;
-                                       interp_clear_ins (ins->prev);
-                                       interp_clear_ins (src->ins);
-                                       mono_interp_stats.super_instructions++;
-                                       mono_interp_stats.killed_instructions++;
                                }
                        }
-                       sp -= 2;
-               } else if (MINT_IS_STLOCFLD (ins->opcode)) {
-                       local_ref_count [ins->data [0]]++;
-                       sp--;
-               } else {
-                       if (pop == MINT_POP_ALL)
-                               pop = sp - stack;
-                       sp += push - pop;
-                       g_assert (sp >= stack && sp <= stack_end);
-                       g_assert ((sp - push) >= stack && (sp - push) <= stack_end);
-                       memset (sp - push, 0, push * sizeof (StackContentInfo));
-                       // If this instruction only pushes a single value, make it a candidate for
-                       // removal, if its value is not used anywhere.
-                       if (push == 1 && pop == 0 && !MINT_IS_CALL (ins->opcode) && !MINT_IS_NEWOBJ (ins->opcode))
-                               sp [-1].ins = ins;
-               }
+                       ins_index++;
                }
        }
 
@@ -7902,8 +8198,7 @@ retry:
        if (needs_retry)
                goto retry;
 
-       g_free (stack);
-       g_free (locals);
+       g_free (local_defs);
        g_free (local_ref_count);
 }
 
@@ -7916,53 +8211,7 @@ mono_test_interp_cprop (TransformData *td)
 static void
 interp_super_instructions (TransformData *td)
 {
-       InterpBasicBlock *bb;
-       for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
-               InterpInst *ins;
-               InterpInst *prev1_ins = NULL;
-               InterpInst *prev2_ins = NULL;
-               for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
-               if (ins->opcode == MINT_NOP)
-                       continue;
-               if (ins->opcode >= MINT_LDFLD_I1 && ins->opcode <= MINT_LDFLD_O && prev1_ins) {
-                       if (prev1_ins->opcode == MINT_LDLOC_O) {
-                               int loc_index = prev1_ins->data [0];
-                               int fld_offset = ins->data [0];
-                               int mt = ins->opcode - MINT_LDFLD_I1;
-                               ins = interp_insert_ins_bb (td, bb, ins, MINT_LDLOCFLD_I1 + mt);
-                               ins->data [0] = loc_index;
-                               ins->data [1] = fld_offset;
-                               interp_clear_ins (ins->prev);
-                               interp_clear_ins (prev1_ins);
-                               prev1_ins = NULL;
-                               mono_interp_stats.super_instructions++;
-                               mono_interp_stats.killed_instructions++;
-                       }
-               } else if (MINT_IS_STLOC (ins->opcode) && prev1_ins && prev2_ins) {
-                       if (prev1_ins->opcode == MINT_ADD1_I4 || prev1_ins->opcode == MINT_ADD1_I8 ||
-                               prev1_ins->opcode == MINT_SUB1_I4 || prev1_ins->opcode == MINT_SUB1_I8) {
-                               if (MINT_IS_LDLOC (prev2_ins->opcode) && prev2_ins->data [0] == ins->data [0]) {
-                                       if (prev1_ins->opcode == MINT_ADD1_I4)
-                                               ins->opcode = MINT_LOCADD1_I4;
-                                       else if (prev1_ins->opcode == MINT_ADD1_I8)
-                                               ins->opcode = MINT_LOCADD1_I8;
-                                       else if (prev1_ins->opcode == MINT_SUB1_I4)
-                                               ins->opcode = MINT_LOCSUB1_I4;
-                                       else
-                                               ins->opcode = MINT_LOCSUB1_I8;
-                                       // the local index is already set inside the replaced STLOC instruction
-                                       interp_clear_ins (prev1_ins);
-                                       interp_clear_ins (prev2_ins);
-                                       prev1_ins = NULL;
-                                       mono_interp_stats.super_instructions++;
-                                       mono_interp_stats.killed_instructions += 2;
-                               }
-                       }
-               }
-               prev2_ins = prev1_ins;
-               prev1_ins = ins;
-               }
-       }
+       // Add some actual super instructions
 }
 
 static void
@@ -8105,7 +8354,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
                g_print ("Runtime method: %s %p\n", mono_method_full_name (method, TRUE), rtm);
                g_print ("Locals size %d, stack size: %d\n", td->total_locals_size, td->max_stack_size);
                g_print ("Calculated stack height: %d, stated height: %d\n", td->max_stack_height, header->max_stack);
-               dump_mint_code (td->new_code, td->new_code_end);
+               dump_interp_code (td->new_code, td->new_code_end);
        }
 
        /* Check if we use excessive stack space */
index bd8ec7b..2e3f631 100644 (file)
@@ -11,6 +11,8 @@
 #define INTERP_INST_FLAG_RECORD_CALL_PATCH 16
 
 #define INTERP_LOCAL_FLAG_DEAD 1
+#define INTERP_LOCAL_FLAG_EXECUTION_STACK 2
+#define INTERP_LOCAL_FLAG_CALL_ARGS 4
 
 typedef struct _InterpInst InterpInst;
 typedef struct _InterpBasicBlock InterpBasicBlock;
@@ -20,40 +22,37 @@ typedef struct
        MonoClass *klass;
        unsigned char type;
        unsigned char flags;
+       /*
+        * The local associated with the value of this stack entry. Every time we push on
+        * the stack a new local is created.
+        */
+       int local;
        /* The offset from the execution stack start where this is stored */
        int offset;
        /* Saves how much stack this is using. It is a multiple of MINT_VT_ALIGNMENT */
        int size;
 } StackInfo;
 
-#define STACK_VALUE_NONE 0
-#define STACK_VALUE_LOCAL 1
-#define STACK_VALUE_I4 2
-#define STACK_VALUE_I8 3
+#define LOCAL_VALUE_NONE 0
+#define LOCAL_VALUE_LOCAL 1
+#define LOCAL_VALUE_I4 2
+#define LOCAL_VALUE_I8 3
 
-// StackValue contains data to construct an InterpInst that is equivalent with the contents
+// LocalValue contains data to construct an InterpInst that is equivalent with the contents
 // of the stack slot / local / argument.
 typedef struct {
-       // Indicates the type of the stored information. It can be a local, argument or a constant
+       // Indicates the type of the stored information. It can be another local or a constant
        int type;
        // Holds the local index or the actual constant value
        union {
                int local;
-               int arg;
                gint32 i;
                gint64 l;
        };
-} StackValue;
-
-typedef struct
-{
-       // This indicates what is currently stored in this stack slot. This can be a constant
-       // or the copy of a local / argument.
-       StackValue val;
-       // The instruction that pushed this stack slot. If ins is null, we can't remove the usage
-       // of the stack slot, because we can't clear the instruction that set it.
+       // The instruction that writes this local.
        InterpInst *ins;
-} StackContentInfo;
+       int def_index;
+} LocalValue;
 
 struct _InterpInst {
        guint16 opcode;
@@ -62,6 +61,8 @@ struct _InterpInst {
        // part of the IL instruction associated with the previous interp instruction.
        int il_offset;
        guint32 flags;
+       gint32 dreg;
+       gint32 sregs [3]; // Currently all instructions have at most 3 sregs
        // This union serves the same purpose as the data array. The difference is that
        // the data array maps exactly to the final representation of the instruction.
        // FIXME We should consider using a separate higher level IR, that is also easier
@@ -69,7 +70,12 @@ struct _InterpInst {
        union {
                InterpBasicBlock *target_bb;
                InterpBasicBlock **target_bb_table;
+               // We handle newobj poorly due to not having our own local offset allocator.
+               // We temporarily use this array to let cprop know the values of the newobj args.
+               int *newobj_reg_map;
        } info;
+       // Variable data immediately following the dreg/sreg information. This is represented exactly
+       // in the final code stream as in this array.
        guint16 data [MONO_ZERO_LEN_ARRAY];
 };
 
@@ -115,6 +121,8 @@ typedef enum {
 
 typedef struct {
        RelocType type;
+       /* For branch relocation, how many sreg slots to skip */
+       int skip;
        /* In the interpreter IR */
        int offset;
        InterpBasicBlock *target_bb;
@@ -127,6 +135,10 @@ typedef struct {
        int indirects;
        int offset;
        int size;
+       union {
+               // the offset from the start of the execution stack locals space
+               int stack_offset;
+       };
 } InterpLocal;
 
 typedef struct
index c949633..7dcadf9 100644 (file)
@@ -78,10 +78,6 @@ verify_cprop_ldloc_stloc (TransformData *td)
                return 1;
        if (expect (&ins, NULL, MINT_CALL))
                return 2;
-       if (expect (&ins, NULL, MINT_STLOC_NP_I4))
-               return 3;
-       if (expect (&ins, NULL, MINT_LDLOC_I4))
-               return 4;
        if (expect (&ins, NULL, MINT_ADD_I4))
                return 5;
        if (expect (&ins, NULL, MINT_RET))