[interp] Add a few super instructions (#50361)
authorVlad Brezae <brezaevlad@gmail.com>
Tue, 30 Mar 2021 11:23:03 +0000 (14:23 +0300)
committerGitHub <noreply@github.com>
Tue, 30 Mar 2021 11:23:03 +0000 (14:23 +0300)
* [interp] Fix PROFILE_INTERP build

* [interp] Add ldc.i8.0 opcode, replacing ldnull on 64bit

We still need to use this opcode in more places, instead of the generic ldc.i8

* [interp] Add a few super instructions

We depend on the cprop pass to init local_ref_count. We only add a super instruction if the definition of the an intermediary result is done in the same basic block (the dreg is local var) and it is not used anywhere else, otherwise we can't clear the instruction.

* [interp] Improve the marvin block intrinsic

By removing also 2 additional ldloca instructions, and enabling cprop for these vars that no longer have their address taken.

src/mono/mono/mini/interp/interp.c
src/mono/mono/mini/interp/mintops.def
src/mono/mono/mini/interp/mintops.h
src/mono/mono/mini/interp/transform.c
src/mono/mono/mini/interp/transform.h

index 9c0dd1d..1fa5e26 100644 (file)
@@ -3205,10 +3205,6 @@ main_loop:
                        ++ip;
                        mono_break ();
                        MINT_IN_BREAK;
-               MINT_IN_CASE(MINT_LDNULL)
-                       LOCAL_VAR (ip [1], gpointer) = NULL;
-                       ip += 2;
-                       MINT_IN_BREAK;
                MINT_IN_CASE(MINT_INIT_ARGLIST) {
                        const guint16 *call_ip = frame->parent->state.ip - 6;
                        g_assert_checked (*call_ip == MINT_CALL_VARARG);
@@ -3266,6 +3262,10 @@ main_loop:
                        LOCAL_VAR (ip [1], gint32) = READ32 (ip + 2);
                        ip += 4;
                        MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_LDC_I8_0)
+                       LOCAL_VAR (ip [1], gint64) = 0;
+                       ip += 2;
+                       MINT_IN_BREAK;
                MINT_IN_CASE(MINT_LDC_I8)
                        LOCAL_VAR (ip [1], gint64) = READ64 (ip + 2);
                        ip += 6;
@@ -3624,6 +3624,12 @@ call:
                MINT_IN_CASE(MINT_RET)
                        frame->retval [0] = LOCAL_VAR (ip [1], stackval);
                        goto exit_frame;
+               MINT_IN_CASE(MINT_RET_I4_IMM)
+                       frame->retval [0].data.i = (gint16)ip [1];
+                       goto exit_frame;
+               MINT_IN_CASE(MINT_RET_I8_IMM)
+                       frame->retval [0].data.l = (gint16)ip [1];
+                       goto exit_frame;
                MINT_IN_CASE(MINT_RET_VOID)
                        goto exit_frame;
                MINT_IN_CASE(MINT_RET_VT) {
@@ -4324,10 +4330,18 @@ call:
                        LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) + 1;
                        ip += 3;
                        MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_ADD_I4_IMM)
+                       LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) + (gint16)ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
                MINT_IN_CASE(MINT_ADD1_I8)
                        LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + 1;
                        ip += 3;
                        MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_ADD_I8_IMM)
+                       LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) + (gint16)ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
                MINT_IN_CASE(MINT_SUB_I4)
                        BINOP(gint32, -);
                        MINT_IN_BREAK;
@@ -4491,6 +4505,30 @@ call:
                MINT_IN_CASE(MINT_SHR_UN_I8)
                        SHIFTOP(guint64, >>);
                        MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_SHL_I4_IMM)
+                       LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) << ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_SHL_I8_IMM)
+                       LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) << ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_SHR_I4_IMM)
+                       LOCAL_VAR (ip [1], gint32) = LOCAL_VAR (ip [2], gint32) >> ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_SHR_I8_IMM)
+                       LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) >> ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_SHR_UN_I4_IMM)
+                       LOCAL_VAR (ip [1], guint32) = LOCAL_VAR (ip [2], guint32) >> ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
+               MINT_IN_CASE(MINT_SHR_UN_I8_IMM)
+                       LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64) >> ip [3];
+                       ip += 4;
+                       MINT_IN_BREAK;
                MINT_IN_CASE(MINT_NEG_I4)
                        LOCAL_VAR (ip [1], gint32) = - LOCAL_VAR (ip [2], gint32);
                        ip += 3;
@@ -4858,7 +4896,7 @@ call:
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_INTRINS_MARVIN_BLOCK) {
-                       interp_intrins_marvin_block (LOCAL_VAR (ip [1], guint32*), LOCAL_VAR (ip [2], guint32*));
+                       interp_intrins_marvin_block ((guint32*)(locals + ip [1]), (guint32*)(locals + ip [2]));
                        ip += 3;
                        MINT_IN_BREAK;
                }
@@ -7184,7 +7222,7 @@ imethod_opcount_comparer (gconstpointer m1, gconstpointer m2)
 static void
 interp_print_method_counts (void)
 {
-       MonoJitMemoryManager *jit_mm = jit_mm_for_method (method);
+       MonoJitMemoryManager *jit_mm = get_default_jit_mm ();
 
        jit_mm_lock (jit_mm);
        imethods = (InterpMethod**) malloc (jit_mm->interp_code_hash.num_entries * sizeof (InterpMethod*));
index 03f5184..b14bd64 100644 (file)
@@ -14,7 +14,6 @@ OPDEF(MINT_DEF, "def", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_DUMMY_USE, "dummy_use", 2, 0, 1, MintOpNoArgs)
 OPDEF(MINT_BREAK, "break", 1, 0, 0, MintOpNoArgs)
 OPDEF(MINT_BREAKPOINT, "breakpoint", 1, 0, 0, MintOpNoArgs)
-OPDEF(MINT_LDNULL, "ldnull", 2, 1, 0, MintOpNoArgs)
 
 OPDEF(MINT_RET, "ret", 2, 0, 1, MintOpNoArgs)
 OPDEF(MINT_RET_VOID, "ret.void", 1, 0, 0, MintOpNoArgs)
@@ -33,11 +32,12 @@ OPDEF(MINT_LDC_I4_5, "ldc.i4.5", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_LDC_I4_6, "ldc.i4.6", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_LDC_I4_7, "ldc.i4.7", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_LDC_I4_8, "ldc.i4.8", 2, 1, 0, MintOpNoArgs)
-
 OPDEF(MINT_LDC_I4_S, "ldc.i4.s", 3, 1, 0, MintOpShortInt)
 OPDEF(MINT_LDC_I4, "ldc.i4", 4, 1, 0, MintOpInt)
-OPDEF(MINT_LDC_I8, "ldc.i8", 6, 1, 0, MintOpLongInt)
+
+OPDEF(MINT_LDC_I8_0, "ldc.i8.0", 2, 1, 0, MintOpNoArgs)
 OPDEF(MINT_LDC_I8_S, "ldc.i8.s", 3, 1, 0, MintOpShortInt)
+OPDEF(MINT_LDC_I8, "ldc.i8", 6, 1, 0, MintOpLongInt)
 
 OPDEF(MINT_LDC_R4, "ldc.r4", 4, 1, 0, MintOpFloat)
 OPDEF(MINT_LDC_R8, "ldc.r8", 6, 1, 0, MintOpDouble)
@@ -422,6 +422,7 @@ OPDEF(MINT_REM_R8, "rem.r8", 4, 1, 2, MintOpNoArgs)
 OPDEF(MINT_REM_UN_I4, "rem.un.i4", 4, 1, 2, MintOpNoArgs)
 OPDEF(MINT_REM_UN_I8, "rem.un.i8", 4, 1, 2, MintOpNoArgs)
 
+// Shifts, keep in order with imm versions
 OPDEF(MINT_SHR_UN_I4, "shr.un.i4", 4, 1, 2, MintOpNoArgs)
 OPDEF(MINT_SHR_UN_I8, "shr.un.i8", 4, 1, 2, MintOpNoArgs)
 OPDEF(MINT_SHL_I4, "shl.i4", 4, 1, 2, MintOpNoArgs)
@@ -595,6 +596,21 @@ OPDEF(MINT_CONV_OVF_U8_R8, "conv.ovf.u8.r8", 3, 1, 1, MintOpNoArgs)
 OPDEF(MINT_CEQ0_I4, "ceq0.i4", 3, 1, 1, MintOpNoArgs)
 /* unops end */
 
+/* super instructions */
+OPDEF(MINT_RET_I4_IMM, "ret.i4.imm", 2, 0, 0, MintOpShortInt)
+OPDEF(MINT_RET_I8_IMM, "ret.i8.imm", 2, 0, 0, MintOpShortInt)
+
+OPDEF(MINT_ADD_I4_IMM, "add.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_ADD_I8_IMM, "add.i8.imm", 4, 1, 1, MintOpShortInt)
+
+OPDEF(MINT_SHR_UN_I4_IMM, "shr.un.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_SHR_UN_I8_IMM, "shr.un.i8.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_SHL_I4_IMM, "shl.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_SHL_I8_IMM, "shl.i8.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_SHR_I4_IMM, "shr.i4.imm", 4, 1, 1, MintOpShortInt)
+OPDEF(MINT_SHR_I8_IMM, "shr.i8.imm", 4, 1, 1, MintOpShortInt)
+
+
 OPDEF(MINT_CKFINITE, "ckfinite", 3, 1, 1, MintOpNoArgs)
 OPDEF(MINT_MKREFANY, "mkrefany", 4, 1, 1, MintOpClassToken)
 OPDEF(MINT_REFANYTYPE, "refanytype", 3, 1, 1, MintOpNoArgs)
index f2ae6df..15254ba 100644 (file)
@@ -61,8 +61,10 @@ typedef enum {
 #define MINT_IS_CALL(op) ((op) >= MINT_CALL && (op) <= MINT_JIT_CALL)
 #define MINT_IS_PATCHABLE_CALL(op) ((op) >= MINT_CALL && (op) <= MINT_VCALL)
 #define MINT_IS_LDC_I4(op) ((op) >= MINT_LDC_I4_M1 && (op) <= MINT_LDC_I4)
+#define MINT_IS_LDC_I8(op) ((op) >= MINT_LDC_I8_0 && (op) <= MINT_LDC_I8)
 #define MINT_IS_UNOP(op) ((op) >= MINT_ADD1_I4 && (op) <= MINT_CEQ0_I4)
 #define MINT_IS_BINOP(op) ((op) >= MINT_ADD_I4 && (op) <= MINT_CLT_UN_R8)
+#define MINT_IS_BINOP_SHIFT(op) ((op) >= MINT_SHR_UN_I4 && (op) <= MINT_SHR_I8)
 #define MINT_IS_LDFLD(op) ((op) >= MINT_LDFLD_I1 && (op) <= MINT_LDFLD_O)
 #define MINT_IS_STFLD(op) ((op) >= MINT_STFLD_I1 && (op) <= MINT_STFLD_O)
 
index 31dc24e..0501d06 100644 (file)
@@ -136,6 +136,12 @@ MonoInterpStats mono_interp_stats;
 #define MINT_MOV_P MINT_MOV_4
 #endif
 
+#if SIZEOF_VOID_P == 8
+#define MINT_LDNULL MINT_LDC_I8_0
+#else
+#define MINT_LDNULL MINT_LDC_I4_0
+#endif
+
 typedef struct {
        const gchar *op_name;
        guint16 insn [3];
@@ -425,6 +431,7 @@ create_interp_local_explicit (TransformData *td, MonoType *type, int size)
        td->locals [td->locals_size].size = size;
        td->locals [td->locals_size].live_start = -1;
        td->locals [td->locals_size].bb_index = -1;
+       td->locals [td->locals_size].def = NULL;
        td->locals_size++;
        return td->locals_size - 1;
 
@@ -1580,6 +1587,18 @@ interp_get_const_from_ldc_i4 (InterpInst *ins)
        }
 }
 
+static gint64
+interp_get_const_from_ldc_i8 (InterpInst *ins)
+{
+       switch (ins->opcode) {
+       case MINT_LDC_I8_0: return 0;
+       case MINT_LDC_I8_S: return (gint64)(gint16)ins->data [0];
+       case MINT_LDC_I8: return READ64 (&ins->data [0]);
+       default:
+               g_assert_not_reached ();
+       }
+}
+
 /* If ins is not null, it will replace it with the ldc */
 static InterpInst*
 interp_get_ldc_i4_from_const (TransformData *td, InterpInst *ins, gint32 ct, int dreg)
@@ -2071,8 +2090,37 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
                g_assert (!strcmp (tm, "get_Value"));
                *op = MINT_LDIND_I;
        } else if (in_corlib && !strcmp (klass_name_space, "System") && !strcmp (klass_name, "Marvin")) {
-               if (!strcmp (tm, "Block"))
-                       *op = MINT_INTRINS_MARVIN_BLOCK;
+               if (!strcmp (tm, "Block")) {
+                       InterpInst *ldloca2 = td->last_ins;
+                       if (ldloca2 != NULL && ldloca2->opcode == MINT_LDLOCA_S) {
+                               InterpInst *ldloca1 = interp_prev_ins (ldloca2);
+                               if (ldloca1 != NULL && ldloca1->opcode == MINT_LDLOCA_S) {
+                                       interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK);
+                                       td->last_ins->sregs [0] = ldloca1->sregs [0];
+                                       td->last_ins->sregs [1] = ldloca2->sregs [0];
+
+                                       // This intrinsic would normally receive two local refs, however, we try optimizing
+                                       // away both ldlocas for better codegen. This means that this intrinsic will instead
+                                       // modify the values of both sregs. In order to not overcomplicate the optimization
+                                       // passes and offset allocator with support for modifiable sregs or multi dregs, we
+                                       // just redefine both sregs after the intrinsic.
+                                       interp_add_ins (td, MINT_DEF);
+                                       td->last_ins->dreg = ldloca1->sregs [0];
+                                       interp_add_ins (td, MINT_DEF);
+                                       td->last_ins->dreg = ldloca2->sregs [0];
+
+                                       // Remove the ldlocas
+                                       td->locals [ldloca1->sregs [0]].indirects--;
+                                       td->locals [ldloca2->sregs [0]].indirects--;
+                                       mono_interp_stats.ldlocas_removed += 2;
+                                       interp_clear_ins (ldloca1);
+                                       interp_clear_ins (ldloca2);
+                                       td->sp -= 2;
+                                       td->ip += 5;
+                                       return TRUE;
+                               }
+                       }
+               }
        } else if (in_corlib && !strcmp (klass_name_space, "System.Runtime.InteropServices") && !strcmp (klass_name, "MemoryMarshal")) {
                if (!strcmp (tm, "GetArrayDataReference"))
                        *op = MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF;
@@ -3796,6 +3844,7 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
                td->locals [i].flags = INTERP_LOCAL_FLAG_GLOBAL;
                td->locals [i].indirects = 0;
                td->locals [i].mt = mt;
+               td->locals [i].def = NULL;
                if (mt == MINT_TYPE_VT) {
                        size = mono_type_size (type, &align);
                        td->locals [i].size = size;
@@ -3824,6 +3873,7 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
                td->locals [index].flags = INTERP_LOCAL_FLAG_GLOBAL;
                td->locals [index].indirects = 0;
                td->locals [index].mt = mint_type (header->locals [i]);
+               td->locals [index].def = NULL;
                if (td->locals [index].mt == MINT_TYPE_VT)
                        td->locals [index].size = size;
                else
@@ -4592,7 +4642,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                        td->ip += 2;
                        break;
                }
-               case CEE_LDNULL: 
+               case CEE_LDNULL:
                        interp_add_ins (td, MINT_LDNULL);
                        push_type (td, STACK_TYPE_O, NULL);
                        interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
@@ -7672,8 +7722,9 @@ interp_optimize_bblocks (TransformData *td)
 }
 
 static gboolean
-interp_local_deadce (TransformData *td, int *local_ref_count)
+interp_local_deadce (TransformData *td)
 {
+       int *local_ref_count = td->local_ref_count;
        gboolean needs_dce = FALSE;
        gboolean needs_cprop = FALSE;
 
@@ -7697,7 +7748,7 @@ interp_local_deadce (TransformData *td, int *local_ref_count)
                for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
                        if (MINT_IS_MOV (ins->opcode) ||
                                        MINT_IS_LDC_I4 (ins->opcode) ||
-                                       ins->opcode == MINT_LDC_I8 ||
+                                       MINT_IS_LDC_I8 (ins->opcode) ||
                                        ins->opcode == MINT_MONO_LDPTR ||
                                        ins->opcode == MINT_LDLOCA_S) {
                                int dreg = ins->dreg;
@@ -7749,8 +7800,9 @@ interp_local_deadce (TransformData *td, int *local_ref_count)
                break;
 
 static InterpInst*
-interp_fold_unop (TransformData *td, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
+interp_fold_unop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
 {
+       int *local_ref_count = td->local_ref_count;
        // ins should be an unop, therefore it should have a single dreg and a single sreg
        int dreg = ins->dreg;
        int sreg = ins->sregs [0];
@@ -7861,8 +7913,9 @@ interp_fold_unop (TransformData *td, LocalValue *local_defs, int *local_ref_coun
                break;
 
 static InterpInst*
-interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
+interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *local_defs, InterpInst *ins)
 {
+       int *local_ref_count = td->local_ref_count;
        // ins should be an unop conditional branch, therefore it should have a single sreg
        int sreg = ins->sregs [0];
        LocalValue *val = &local_defs [sreg];
@@ -7922,8 +7975,9 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *
 
 
 static InterpInst*
-interp_fold_binop (TransformData *td, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
+interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
 {
+       int *local_ref_count = td->local_ref_count;
        // ins should be a binop, therefore it should have a single dreg and two sregs
        int dreg = ins->dreg;
        int sreg1 = ins->sregs [0];
@@ -8043,8 +8097,9 @@ interp_fold_binop (TransformData *td, LocalValue *local_defs, int *local_ref_cou
                break;
 
 static InterpInst*
-interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *local_defs, int *local_ref_count, InterpInst *ins)
+interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *local_defs, InterpInst *ins)
 {
+       int *local_ref_count = td->local_ref_count;
        // ins should be a conditional binop, therefore it should have only two sregs
        int sreg1 = ins->sregs [0];
        int sreg2 = ins->sregs [1];
@@ -8094,8 +8149,9 @@ interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue
 }
 
 static void
-cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, int *local_ref_count, LocalValue *local_defs)
+cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_defs)
 {
+       int *local_ref_count = td->local_ref_count;
        int sreg = *psreg;
 
        local_ref_count [sreg]++;
@@ -8126,6 +8182,7 @@ interp_cprop (TransformData *td)
        gboolean needs_retry;
        int ins_index;
 
+       td->local_ref_count = local_ref_count;
 retry:
        memset (local_ref_count, 0, td->locals_size * sizeof (int));
 
@@ -8167,12 +8224,12 @@ retry:
                                        int *call_args = ins->info.call_args;
                                        if (call_args) {
                                                while (*call_args != -1) {
-                                                       cprop_sreg (td, ins, call_args, local_ref_count, local_defs);
+                                                       cprop_sreg (td, ins, call_args, local_defs);
                                                        call_args++;
                                                }
                                        }
                                } else {
-                                       cprop_sreg (td, ins, &sregs [i], local_ref_count, local_defs);
+                                       cprop_sreg (td, ins, &sregs [i], local_defs);
                                        // This var is used as a source to a normal instruction. In case this var will
                                        // also be used as source to a call, make sure the offset allocator will create
                                        // a new temporary call arg var and not use this one. Call arg vars have special
@@ -8261,9 +8318,9 @@ retry:
                        } else if (MINT_IS_LDC_I4 (opcode)) {
                                local_defs [dreg].type = LOCAL_VALUE_I4;
                                local_defs [dreg].i = interp_get_const_from_ldc_i4 (ins);
-                       } else if (opcode == MINT_LDC_I8) {
+                       } else if (MINT_IS_LDC_I8 (opcode)) {
                                local_defs [dreg].type = LOCAL_VALUE_I8;
-                               local_defs [dreg].l = READ64 (&ins->data [0]);
+                               local_defs [dreg].l = interp_get_const_from_ldc_i8 (ins);
                        } else if (ins->opcode == MINT_MONO_LDPTR) {
 #if SIZEOF_VOID_P == 8
                                local_defs [dreg].type = LOCAL_VALUE_I8;
@@ -8273,13 +8330,13 @@ retry:
                                local_defs [dreg].i = (gint32)td->data_items [ins->data [0]];
 #endif
                        } else if (MINT_IS_UNOP (opcode) || (opcode >= MINT_MOV_I1 && opcode <= MINT_MOV_U2)) {
-                               ins = interp_fold_unop (td, local_defs, local_ref_count, ins);
+                               ins = interp_fold_unop (td, local_defs, ins);
                        } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode)) {
-                               ins = interp_fold_unop_cond_br (td, bb, local_defs, local_ref_count, ins);
+                               ins = interp_fold_unop_cond_br (td, bb, local_defs, ins);
                        } else if (MINT_IS_BINOP (opcode)) {
-                               ins = interp_fold_binop (td, local_defs, local_ref_count, ins);
+                               ins = interp_fold_binop (td, local_defs, ins);
                        } else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode)) {
-                               ins = interp_fold_binop_cond_br (td, bb, local_defs, local_ref_count, ins);
+                               ins = interp_fold_binop_cond_br (td, bb, local_defs, ins);
                        } else if (MINT_IS_LDFLD (opcode) && ins->data [0] == 0) {
                                InterpInst *ldloca = local_defs [sregs [0]].ins;
                                if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S &&
@@ -8322,7 +8379,7 @@ retry:
                }
        }
 
-       needs_retry = interp_local_deadce (td, local_ref_count);
+       needs_retry = interp_local_deadce (td);
        if (mono_interp_opt & INTERP_OPT_BBLOCKS)
                needs_retry |= interp_optimize_bblocks (td);
 
@@ -8330,7 +8387,6 @@ retry:
                goto retry;
 
        g_free (local_defs);
-       g_free (local_ref_count);
 }
 
 void
@@ -8339,12 +8395,150 @@ mono_test_interp_cprop (TransformData *td)
        interp_cprop (td);
 }
 
+static gboolean
+get_sreg_imm (TransformData *td, int sreg, gint16 *imm)
+{
+       InterpInst *def = td->locals [sreg].def;
+       if (def != NULL && td->local_ref_count [sreg] == 1) {
+               gint64 ct;
+               if (MINT_IS_LDC_I4 (def->opcode))
+                       ct = interp_get_const_from_ldc_i4 (def);
+               else if (MINT_IS_LDC_I8 (def->opcode))
+                       ct = interp_get_const_from_ldc_i8 (def);
+               else
+                       return FALSE;
+               if (ct >= G_MININT16 && ct <= G_MAXINT16) {
+                       *imm = (gint16)ct;
+                       mono_interp_stats.super_instructions++;
+                       return TRUE;
+               }
+       }
+       return FALSE;
+}
+
 static void
 interp_super_instructions (TransformData *td)
 {
+       InterpBasicBlock *bb;
+       int *local_ref_count = td->local_ref_count;
        // Add some actual super instructions
+       for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
+               InterpInst *ins;
+
+               // Set cbb since we do some instruction inserting below
+               td->cbb = bb;
+
+               for (ins = bb->first_ins; ins != NULL; ins = ins->next) {
+                       int opcode = ins->opcode;
+                       if (opcode == MINT_NOP)
+                               continue;
+                       if (mono_interp_op_dregs [opcode] && !(td->locals [ins->dreg].flags & INTERP_LOCAL_FLAG_GLOBAL))
+                               td->locals [ins->dreg].def = ins;
+
+                       if (opcode == MINT_RET) {
+                               // ldc + ret -> ret.imm
+                               int sreg = ins->sregs [0];
+                               gint16 imm;
+                               if (get_sreg_imm (td, sreg, &imm)) {
+                                       InterpInst *def = td->locals [sreg].def;
+                                       int ret_op = MINT_IS_LDC_I4 (def->opcode) ? MINT_RET_I4_IMM : MINT_RET_I8_IMM;
+                                       InterpInst *new_inst = interp_insert_ins (td, ins, ret_op);
+                                       new_inst->data [0] = imm;
+                                       interp_clear_ins (def);
+                                       interp_clear_ins (ins);
+                                       local_ref_count [sreg]--;
+
+                                       if (td->verbose_level) {
+                                               g_print ("superins: ");
+                                               dump_interp_inst (new_inst);
+                                       }
+                               }
+                       } else if (opcode == MINT_ADD_I4 || opcode == MINT_ADD_I8) {
+                               int sreg = -1;
+                               int sreg_imm = -1;
+                               gint16 imm;
+                               if (get_sreg_imm (td, ins->sregs [0], &imm)) {
+                                       sreg = ins->sregs [1];
+                                       sreg_imm = ins->sregs [0];
+                               } else if (get_sreg_imm (td, ins->sregs [1], &imm)) {
+                                       sreg = ins->sregs [0];
+                                       sreg_imm = ins->sregs [1];
+                               }
+                               if (sreg != -1) {
+                                       int add_op = opcode == MINT_ADD_I4 ? MINT_ADD_I4_IMM : MINT_ADD_I8_IMM;
+                                       InterpInst *new_inst = interp_insert_ins (td, ins, add_op);
+                                       new_inst->dreg = ins->dreg;
+                                       new_inst->sregs [0] = sreg;
+                                       new_inst->data [0] = imm;
+                                       interp_clear_ins (td->locals [sreg_imm].def);
+                                       interp_clear_ins (ins);
+                                       local_ref_count [sreg_imm]--;
+                                       if (td->verbose_level) {
+                                               g_print ("superins: ");
+                                               dump_interp_inst (new_inst);
+                                       }
+                               }
+                       } else if (opcode == MINT_SUB_I4 || opcode == MINT_SUB_I8) {
+                               // ldc + sub -> add.-imm
+                               gint16 imm;
+                               int sreg_imm = ins->sregs [1];
+                               if (get_sreg_imm (td, sreg_imm, &imm) && imm != G_MININT16) {
+                                       int add_op = opcode == MINT_SUB_I4 ? MINT_ADD_I4_IMM : MINT_ADD_I8_IMM;
+                                       InterpInst *new_inst = interp_insert_ins (td, ins, add_op);
+                                       new_inst->dreg = ins->dreg;
+                                       new_inst->sregs [0] = ins->sregs [0];
+                                       new_inst->data [0] = -imm;
+                                       interp_clear_ins (td->locals [sreg_imm].def);
+                                       interp_clear_ins (ins);
+                                       local_ref_count [sreg_imm]--;
+                                       if (td->verbose_level) {
+                                               g_print ("superins: ");
+                                               dump_interp_inst (new_inst);
+                                       }
+                               }
+                       } else if (MINT_IS_BINOP_SHIFT (opcode)) {
+                               // ldc + sh -> sh.imm
+                               gint16 imm;
+                               int sreg_imm = ins->sregs [1];
+                               if (get_sreg_imm (td, sreg_imm, &imm)) {
+                                       int shift_op = MINT_SHR_UN_I4_IMM + (opcode - MINT_SHR_UN_I4);
+                                       InterpInst *new_inst = interp_insert_ins (td, ins, shift_op);
+                                       new_inst->dreg = ins->dreg;
+                                       new_inst->sregs [0] = ins->sregs [0];
+                                       new_inst->data [0] = imm;
+                                       interp_clear_ins (td->locals [sreg_imm].def);
+                                       interp_clear_ins (ins);
+                                       local_ref_count [sreg_imm]--;
+                                       if (td->verbose_level) {
+                                               g_print ("superins: ");
+                                               dump_interp_inst (new_inst);
+                                       }
+                               }
+                       } else if (MINT_IS_LDFLD (opcode)) {
+                               // cknull + ldfld -> ldfld
+                               // FIXME This optimization is very limited, it is meant mainly to remove cknull
+                               // when inlining property accessors. We should have more advanced cknull removal
+                               // optimzations, so we can catch cases where instructions are not next to each other.
+                               int obj_sreg = ins->sregs [0];
+                               InterpInst *def = td->locals [obj_sreg].def;
+                               if (def != NULL && def->opcode == MINT_CKNULL && interp_prev_ins (ins) == def &&
+                                               def->dreg == obj_sreg && local_ref_count [obj_sreg] == 1) {
+                                       if (td->verbose_level) {
+                                               g_print ("remove redundant cknull (%s): ", td->method->name);
+                                               dump_interp_inst (def);
+                                       }
+                                       ins->sregs [0] = def->sregs [0];
+                                       interp_clear_ins (def);
+                                       local_ref_count [obj_sreg]--;
+                                       mono_interp_stats.super_instructions++;
+                               }
+                       }
+               }
+       }
 }
 
+static void initialize_global_vars (TransformData *td);
+
 static void
 interp_optimize_code (TransformData *td)
 {
@@ -8354,7 +8548,13 @@ interp_optimize_code (TransformData *td)
        if (mono_interp_opt & INTERP_OPT_CPROP)
                MONO_TIME_TRACK (mono_interp_stats.cprop_time, interp_cprop (td));
 
-       if (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS)
+       // After this point control optimizations on control flow can no longer happen, so we can determine
+       // which vars are global. This helps speed up the super instructions pass, which only operates on
+       // single def, single use local vars.
+       initialize_global_vars (td);
+
+       if ((mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS) &&
+                       (mono_interp_opt & INTERP_OPT_CPROP))
                MONO_TIME_TRACK (mono_interp_stats.super_instructions_time, interp_super_instructions (td));
 }
 
@@ -8986,6 +9186,7 @@ exit:
        g_free (td->data_items);
        g_free (td->stack);
        g_free (td->locals);
+       g_free (td->local_ref_count);
        g_hash_table_destroy (td->data_hash);
 #ifdef ENABLE_EXPERIMENT_TIERED
        g_hash_table_destroy (td->patchsite_hash);
index 0609d20..4d9aa92 100644 (file)
@@ -145,8 +145,12 @@ typedef struct {
        // index of first basic block where this var is used
        int bb_index;
        union {
-               // If var is INTERP_LOCAL_FLAG_CALL_ARGS, this is the call instruction using it
+               // If var is INTERP_LOCAL_FLAG_CALL_ARGS, this is the call instruction using it.
+               // Only used during var offset allocator
                InterpInst *call;
+               // For local vars, this represents the instruction declaring it.
+               // Only used during super instruction pass.
+               InterpInst *def;
        };
 } InterpLocal;
 
@@ -173,6 +177,7 @@ typedef struct
        gint32 param_area_offset;
        gint32 total_locals_size;
        InterpLocal *locals;
+       int *local_ref_count;
        unsigned int il_locals_offset;
        unsigned int il_locals_size;
        unsigned int locals_size;