From fb452c9e660868695e0c83e335d22557731983cb Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Wed, 23 Oct 2019 11:57:07 +0300 Subject: [PATCH] [interp] Add a new super instructions pass (mono/mono#17489) Together with a few very simple super instructions. We replace common instruction patterns to avoid indirection via interp stack, instruction dispatch overhead and to allow the compiler to generate better code for the instruction. Commit migrated from https://github.com/mono/mono/commit/479d2dff365a688cc0045b3c76aa7aba79d4784e --- src/mono/mono/mini/interp/interp-internals.h | 6 ++- src/mono/mono/mini/interp/interp.c | 64 ++++++++++++++++++++-- src/mono/mono/mini/interp/mintops.def | 27 ++++++++++ src/mono/mono/mini/interp/mintops.h | 3 ++ src/mono/mono/mini/interp/transform.c | 80 +++++++++++++++++++++++++++- 5 files changed, 175 insertions(+), 5 deletions(-) diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index 7948e35..26b16a6 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -39,9 +39,11 @@ enum { }; enum { + INTERP_OPT_NONE = 0, INTERP_OPT_INLINE = 1, INTERP_OPT_CPROP = 2, - INTERP_OPT_DEFAULT = INTERP_OPT_INLINE | INTERP_OPT_CPROP + INTERP_OPT_SUPER_INSTRUCTIONS = 4, + INTERP_OPT_DEFAULT = INTERP_OPT_INLINE | INTERP_OPT_CPROP | INTERP_OPT_SUPER_INSTRUCTIONS }; #if SIZEOF_VOID_P == 4 @@ -197,12 +199,14 @@ typedef struct { typedef struct { gint64 transform_time; gint64 cprop_time; + gint64 super_instructions_time; gint32 stloc_nps; gint32 movlocs; gint32 copy_propagations; gint32 constant_folds; gint32 killed_instructions; gint32 emitted_instructions; + gint32 super_instructions; gint32 added_pop_count; gint32 inlined_methods; gint32 inline_failures; diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 0dd1a65..6b37efd 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -4226,6 +4226,14 @@ common_vcall: ++sp [-1].data.l; ++ip; MINT_IN_BREAK; + MINT_IN_CASE(MINT_LOCADD1_I4) + *(gint32*)(locals + ip [1]) += 1; + ip += 2; + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LOCADD1_I8) + *(gint64*)(locals + ip [1]) += 1; + ip += 2; + MINT_IN_BREAK; MINT_IN_CASE(MINT_SUB_I4) BINOP(i, -); MINT_IN_BREAK; @@ -4246,6 +4254,13 @@ common_vcall: --sp [-1].data.l; ++ip; MINT_IN_BREAK; + MINT_IN_CASE(MINT_LOCSUB1_I4) + *(gint32*)(locals + ip [1]) -= 1; + ip += 2; + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LOCSUB1_I8) + *(gint64*)(locals + ip [1]) -= 1; + MINT_IN_BREAK; MINT_IN_CASE(MINT_MUL_I4) BINOP(i, *); MINT_IN_BREAK; @@ -4967,6 +4982,43 @@ common_vcall: MINT_IN_BREAK; } + +#define LDARGFLD(datamem, fieldtype) do { \ + MonoObject *o = frame->stack_args [ip [1]].data.o; \ + NULL_CHECK (o); \ + sp [0].data.datamem = *(fieldtype *)((char *)o + ip [2]) ; \ + sp++; \ + ip += 3; \ +} while (0) + MINT_IN_CASE(MINT_LDARGFLD_I1) LDARGFLD(i, gint8); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_U1) LDARGFLD(i, guint8); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_I2) LDARGFLD(i, gint16); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_U2) LDARGFLD(i, guint16); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_I4) LDARGFLD(i, gint32); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_I8) LDARGFLD(l, gint64); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_R4) LDARGFLD(f_r4, float); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_R8) LDARGFLD(f, double); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_O) LDARGFLD(p, gpointer); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDARGFLD_P) LDARGFLD(p, gpointer); MINT_IN_BREAK; + +#define LDLOCFLD(datamem, fieldtype) do { \ + MonoObject *o = *(MonoObject**)(locals + ip [1]); \ + NULL_CHECK (o); \ + sp [0].data.datamem = * (fieldtype *)((char *)o + ip [2]) ; \ + sp++; \ + ip += 3; \ +} while (0) + MINT_IN_CASE(MINT_LDLOCFLD_I1) LDLOCFLD(i, gint8); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_U1) LDLOCFLD(i, guint8); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_I2) LDLOCFLD(i, gint16); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_U2) LDLOCFLD(i, guint16); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_I4) LDLOCFLD(i, gint32); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_I8) LDLOCFLD(l, gint64); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_R4) LDLOCFLD(f_r4, float); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_R8) LDLOCFLD(f, double); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_O) LDLOCFLD(p, gpointer); MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDLOCFLD_P) LDLOCFLD(p, gpointer); MINT_IN_BREAK; + #define STFLD_UNALIGNED(datamem, fieldtype, unaligned) do { \ MonoObject* const o = sp [-2].data.o; \ NULL_CHECK (o); \ @@ -6633,12 +6685,16 @@ interp_parse_options (const char *options) if (strncmp (arg, "jit=", 4) == 0) mono_interp_jit_classes = g_slist_prepend (mono_interp_jit_classes, arg + 4); - if (strncmp (arg, "interp-only=", strlen ("interp-only=")) == 0) + else if (strncmp (arg, "interp-only=", strlen ("interp-only=")) == 0) mono_interp_only_classes = g_slist_prepend (mono_interp_only_classes, arg + strlen ("interp-only=")); - if (strncmp (arg, "-inline", 7) == 0) + else if (strncmp (arg, "-inline", 7) == 0) mono_interp_opt &= ~INTERP_OPT_INLINE; - if (strncmp (arg, "-cprop", 6) == 0) + else if (strncmp (arg, "-cprop", 6) == 0) mono_interp_opt &= ~INTERP_OPT_CPROP; + else if (strncmp (arg, "-super", 6) == 0) + mono_interp_opt &= ~INTERP_OPT_SUPER_INSTRUCTIONS; + else if (strncmp (arg, "-all", 4) == 0) + mono_interp_opt = INTERP_OPT_NONE; } } @@ -6968,11 +7024,13 @@ register_interp_stats (void) mono_counters_init (); mono_counters_register ("Total transform time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.transform_time); mono_counters_register ("Total cprop time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.cprop_time); + mono_counters_register ("Total super instructions time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.super_instructions_time); mono_counters_register ("STLOC_NP count", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.stloc_nps); mono_counters_register ("MOVLOC count", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.movlocs); mono_counters_register ("Copy propagations", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.copy_propagations); mono_counters_register ("Added pop count", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.added_pop_count); mono_counters_register ("Constant folds", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.constant_folds); + mono_counters_register ("Super instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.super_instructions); mono_counters_register ("Killed instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.killed_instructions); mono_counters_register ("Emitted instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.emitted_instructions); mono_counters_register ("Methods inlined", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.inlined_methods); diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index f96aa2d..81a01d8 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -94,6 +94,28 @@ OPDEF(MINT_LDRMFLD_VT, "ldrmfld.vt", 2, Pop1, Push1, MintOpUShortInt) OPDEF(MINT_LDFLDA, "ldflda", 2, Pop1, Push1, MintOpUShortInt) OPDEF(MINT_LDFLDA_UNSAFE, "ldflda.unsafe", 2, Pop1, Push1, MintOpUShortInt) +OPDEF(MINT_LDARGFLD_I1, "ldargfld.i1", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_U1, "ldargfld.u1", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_I2, "ldargfld.i2", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_U2, "ldargfld.u2", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_I4, "ldargfld.i4", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_I8, "ldargfld.i8", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_R4, "ldargfld.r4", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_R8, "ldargfld.r8", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_O, "ldargfld.o", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDARGFLD_P, "ldargfld.p", 3, Pop0, Push1, MintOpTwoShorts) + +OPDEF(MINT_LDLOCFLD_I1, "ldlocfld.i1", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_U1, "ldlocfld.u1", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_I2, "ldlocfld.i2", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_U2, "ldlocfld.u2", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_I4, "ldlocfld.i4", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_I8, "ldlocfld.i8", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_R4, "ldlocfld.r4", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_R8, "ldlocfld.r8", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_O, "ldlocfld.o", 3, Pop0, Push1, MintOpTwoShorts) +OPDEF(MINT_LDLOCFLD_P, "ldlocfld.p", 3, Pop0, Push1, MintOpTwoShorts) + OPDEF(MINT_STFLD_I1, "stfld.i1", 2, Pop2, Push0, MintOpUShortInt) OPDEF(MINT_STFLD_U1, "stfld.u1", 2, Pop2, Push0, MintOpUShortInt) OPDEF(MINT_STFLD_I2, "stfld.i2", 2, Pop2, Push0, MintOpUShortInt) @@ -524,6 +546,11 @@ OPDEF(MINT_CLT_UN_R4, "clt.un.r4", 1, Pop2, Push1, MintOpNoArgs) OPDEF(MINT_CLT_UN_R8, "clt.un.r8", 1, Pop2, Push1, MintOpNoArgs) /* binops end */ +OPDEF(MINT_LOCADD1_I4, "locadd1.i4", 2, Pop0, Push0, MintOpUShortInt) +OPDEF(MINT_LOCADD1_I8, "locadd1.i8", 2, Pop0, Push0, MintOpUShortInt) +OPDEF(MINT_LOCSUB1_I4, "locsub1.i4", 2, Pop0, Push0, MintOpUShortInt) +OPDEF(MINT_LOCSUB1_I8, "locsub1.i8", 2, Pop0, Push0, MintOpUShortInt) + /* unops */ OPDEF(MINT_ADD1_I4, "add1.i4", 1, Pop1, Push1, MintOpNoArgs) OPDEF(MINT_ADD1_I8, "add1.i8", 1, Pop1, Push1, MintOpNoArgs) diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index 752fb5a..0afec36 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -64,6 +64,9 @@ typedef enum { #define MINT_IS_LDC_I4(op) ((op) >= MINT_LDC_I4_M1 && (op) <= MINT_LDC_I4) #define MINT_IS_UNOP(op) ((op) >= MINT_ADD1_I4 && (op) <= MINT_CEQ0_I4) #define MINT_IS_BINOP(op) ((op) >= MINT_ADD_I4 && (op) <= MINT_CLT_UN_R8) +#define MINT_IS_LDLOCFLD(op) ((op) >= MINT_LDLOCFLD_I1 && (op) <= MINT_LDLOCFLD_P) +#define MINT_IS_LOCUNOP(op) ((op) >= MINT_LOCADD1_I4 && (op) <= MINT_LOCSUB1_I8) + #define MINT_POP_ALL -2 #define MINT_VAR_PUSH -1 diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 48f18c9..f767e09 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -6396,7 +6396,8 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in cbb->seq_points = g_slist_prepend_mempool (td->mempool, cbb->seq_points, seqp); cbb->last_seq_point = seqp; } else { - if (MINT_IS_LDLOC (opcode) || MINT_IS_STLOC (opcode) || MINT_IS_STLOC_NP (opcode) || opcode == MINT_LDLOCA_S) { + if (MINT_IS_LDLOC (opcode) || MINT_IS_STLOC (opcode) || MINT_IS_STLOC_NP (opcode) || opcode == MINT_LDLOCA_S || + MINT_IS_LDLOCFLD (opcode) || MINT_IS_LOCUNOP (opcode)) { ins->data [0] = get_interp_local_offset (td, ins->data [0]); } else if (MINT_IS_MOVLOC (opcode)) { ins->data [0] = get_interp_local_offset (td, ins->data [0]); @@ -7088,10 +7089,87 @@ retry: } static void +interp_super_instructions (TransformData *td) +{ + InterpInst *ins; + InterpInst *prev1_ins = NULL; + InterpInst *prev2_ins = NULL; + int last_il_offset = -1; + for (ins = td->first_ins; ins != NULL; ins = ins->next) { + int il_offset = ins->il_offset; + // If two instructions have the same il_offset, then the second one + // cannot be the start of a basic block. + gboolean is_bb_start = il_offset != -1 && td->is_bb_start [il_offset] && il_offset != last_il_offset; + last_il_offset = il_offset; + if (ins->opcode == MINT_NOP) + continue; + if (is_bb_start) { + // Prevent optimizations spanning multiple basic blocks + prev2_ins = NULL; + prev1_ins = NULL; + } + if (ins->opcode >= MINT_LDFLD_I1 && ins->opcode <= MINT_LDFLD_P && prev1_ins) { + if (prev1_ins->opcode == MINT_LDLOC_O) { + int loc_index = prev1_ins->data [0]; + int fld_offset = ins->data [0]; + int mt = ins->opcode - MINT_LDFLD_I1; + ins = interp_insert_ins (td, ins, MINT_LDLOCFLD_I1 + mt); + ins->data [0] = loc_index; + ins->data [1] = fld_offset; + interp_clear_ins (td, ins->prev); + interp_clear_ins (td, prev1_ins); + prev1_ins = NULL; + mono_interp_stats.super_instructions++; + mono_interp_stats.killed_instructions++; + } else if (prev1_ins->opcode == MINT_LDARG_O || prev1_ins->opcode == MINT_LDARG_P0) { + int arg_index = 0; + int fld_offset = ins->data [0]; + int mt = ins->opcode - MINT_LDFLD_I1; + if (prev1_ins->opcode == MINT_LDARG_O) + arg_index = prev1_ins->data [0]; + ins = interp_insert_ins (td, ins, MINT_LDARGFLD_I1 + mt); + ins->data [0] = arg_index; + ins->data [1] = fld_offset; + interp_clear_ins (td, ins->prev); + interp_clear_ins (td, prev1_ins); + prev1_ins = NULL; + mono_interp_stats.super_instructions++; + mono_interp_stats.killed_instructions++; + } + } else if (MINT_IS_STLOC (ins->opcode) && prev1_ins && prev2_ins) { + if (prev1_ins->opcode == MINT_ADD1_I4 || prev1_ins->opcode == MINT_ADD1_I8 || + prev1_ins->opcode == MINT_SUB1_I4 || prev1_ins->opcode == MINT_SUB1_I8) { + if (MINT_IS_LDLOC (prev2_ins->opcode) && prev2_ins->data [0] == ins->data [0]) { + if (prev1_ins->opcode == MINT_ADD1_I4) + ins->opcode = MINT_LOCADD1_I4; + else if (prev1_ins->opcode == MINT_ADD1_I8) + ins->opcode = MINT_LOCADD1_I8; + else if (prev1_ins->opcode == MINT_SUB1_I4) + ins->opcode = MINT_LOCSUB1_I4; + else + ins->opcode = MINT_LOCSUB1_I8; + // the local index is already set inside the replaced STLOC instruction + interp_clear_ins (td, prev1_ins); + interp_clear_ins (td, prev2_ins); + prev1_ins = NULL; + mono_interp_stats.super_instructions++; + mono_interp_stats.killed_instructions += 2; + } + } + } + prev2_ins = prev1_ins; + prev1_ins = ins; + } +} + +static void interp_optimize_code (TransformData *td) { if (mono_interp_opt & INTERP_OPT_CPROP) MONO_TIME_TRACK (mono_interp_stats.cprop_time, interp_cprop (td)); + + if (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS) + MONO_TIME_TRACK (mono_interp_stats.super_instructions_time, interp_super_instructions (td)); } static void -- 2.7.4