From efc473a3fa0c5dc5023fc7ffba1de7d066669d24 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Fri, 4 Oct 2019 14:20:52 +0300 Subject: [PATCH] [interp] Improve copy propagation (mono/mono#17154) * [interp] Refactor tracking of stack/local values Previously we had just an InterpInst* inside StackContentInfo, which was representing the instruction that pushed a certain local/value on the stack. This makes many things awkward, since an instruction is logically different from a value, that a local or a stack slot have. If we clear an instruction, it doesn't necessarily mean that the value that the instruction produced can't be stored on stack or in a local. This commit creates a new structure StackValue, which holds the value. * [interp] Generalize contents of StackValue StackValue contains an opcode and some data to enable reconstruction of the value. For example instead of doing a LDLOC for a local, we can see if the local has a known value and use it instead (this could be a LDLOC from another local, whose value was propagated, or in the future a LDC). This will make more sense when we also start to track constant values. Also decouple MOVLOC instructions from the cprop pass. They serve no purpose there. They are useful though when we do deadce, since we currently don't know how to kill instructions that change the stack. * [interp] Handle dup opcode during copy propagation * [interp] Avoid losing track of stack values for some opcodes For some opcodes, that access stack values below the top of the stack, we were assuming they pop everything and then push the stack back, in order to prevent optimizing away instructions that pushed some of these values, since the original instruction expects those values to reside on the stack. We handle these instruction separately and keep track of the values of the stack, so we can further propagate the stack values, even though we currently can't optimize away those instructions. * [interp] Propagate values for ctor arguments * [interp] MINT_CASTCLASS no longer clobbers top of stack Commit migrated from https://github.com/mono/mono/commit/d1b3ee2e3234657044bc5d0f4a9facf0fa06599c --- src/mono/mono/mini/interp/mintops.def | 16 +-- src/mono/mono/mini/interp/transform.c | 177 ++++++++++++++++++++++------------ 2 files changed, 124 insertions(+), 69 deletions(-) diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 0dcf893..9a95e56 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -363,16 +363,16 @@ OPDEF(MINT_NEWOBJ_VT_FAST, "newobj_vt_fast", 3, VarPop, Push1, MintOpMethodToken OPDEF(MINT_NEWOBJ_VTST_FAST, "newobj_vtst_fast", 4, VarPop, Push1, MintOpMethodToken) OPDEF(MINT_NEWOBJ_MAGIC, "newobj_magic", 2, Pop0, Push0, MintOpMethodToken) OPDEF(MINT_INITOBJ, "initobj", 3, Pop1, Push0, MintOpInt) -OPDEF(MINT_CASTCLASS, "castclass", 2, Pop1, Push1, MintOpClassToken) +OPDEF(MINT_CASTCLASS, "castclass", 2, Pop0, Push0, MintOpClassToken) OPDEF(MINT_ISINST, "isinst", 2, Pop1, Push1, MintOpClassToken) -OPDEF(MINT_CASTCLASS_INTERFACE, "castclass.interface", 2, Pop1, Push1, MintOpClassToken) +OPDEF(MINT_CASTCLASS_INTERFACE, "castclass.interface", 2, Pop0, Push0, MintOpClassToken) OPDEF(MINT_ISINST_INTERFACE, "isinst.interface", 2, Pop1, Push1, MintOpClassToken) -OPDEF(MINT_CASTCLASS_COMMON, "castclass.common", 2, Pop1, Push1, MintOpClassToken) +OPDEF(MINT_CASTCLASS_COMMON, "castclass.common", 2, Pop0, Push0, MintOpClassToken) OPDEF(MINT_ISINST_COMMON, "isinst.common", 2, Pop1, Push1, MintOpClassToken) OPDEF(MINT_NEWARR, "newarr", 2, Pop1, Push1, MintOpClassToken) -OPDEF(MINT_BOX, "box", 3, VarPop, VarPush, MintOpTwoShorts) -OPDEF(MINT_BOX_VT, "box.vt", 3, VarPop, VarPush, MintOpTwoShorts) -OPDEF(MINT_BOX_NULLABLE, "box.nullable", 3, VarPop, VarPush, MintOpTwoShorts) +OPDEF(MINT_BOX, "box", 3, Pop0, Push0, MintOpTwoShorts) +OPDEF(MINT_BOX_VT, "box.vt", 3, Pop0, Push0, MintOpTwoShorts) +OPDEF(MINT_BOX_NULLABLE, "box.nullable", 3, Pop0, Push0, MintOpTwoShorts) OPDEF(MINT_UNBOX, "unbox", 2, Pop1, Push1, MintOpClassToken) OPDEF(MINT_LDTOKEN, "ldtoken", 2, Pop0, Push1, MintOpClassToken) /* not really */ OPDEF(MINT_LDFTN, "ldftn", 2, Pop0, Push1, MintOpMethodToken) @@ -655,7 +655,7 @@ OPDEF(MINT_MKREFANY, "mkrefany", 2, Pop1, Push1, MintOpClassToken) OPDEF(MINT_REFANYTYPE, "refanytype", 1, Pop1, Push1, MintOpNoArgs) OPDEF(MINT_REFANYVAL, "refanyval", 2, Pop1, Push1, MintOpNoArgs) -OPDEF(MINT_CKNULL_N, "cknull_n", 2, VarPop, VarPush, MintOpUShortInt) +OPDEF(MINT_CKNULL_N, "cknull_n", 2, Pop0, Push0, MintOpUShortInt) OPDEF(MINT_GETCHR, "getchr", 1, Pop2, Push1, MintOpNoArgs) OPDEF(MINT_STRLEN, "strlen", 1, Pop1, Push1, MintOpNoArgs) @@ -689,7 +689,7 @@ OPDEF(MINT_SDB_INTR_LOC, "sdb_intr_loc", 1, Pop0, Push0, MintOpNoArgs) OPDEF(MINT_SDB_SEQ_POINT, "sdb_seq_point", 1, Pop0, Push0, MintOpNoArgs) OPDEF(MINT_SDB_BREAKPOINT, "sdb_breakpoint", 1, Pop0, Push0, MintOpNoArgs) OPDEF(MINT_LD_DELEGATE_METHOD_PTR, "ld_delegate_method_ptr", 1, Pop1, Push1, MintOpNoArgs) -OPDEF(MINT_LD_DELEGATE_INVOKE_IMPL, "ld_delegate_invoke_impl", 2, VarPop, VarPush, MintOpNoArgs) +OPDEF(MINT_LD_DELEGATE_INVOKE_IMPL, "ld_delegate_invoke_impl", 2, Pop0, Push1, MintOpNoArgs) OPDEF(MINT_START_ABORT_PROT, "start_abort_protected", 1, Pop0, Push0, MintOpNoArgs) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 20158df..3b28402 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -51,8 +51,22 @@ typedef struct unsigned char flags; } StackInfo; +// StackValue contains data to construct an InterpInst that is equivalent with the contents +// of the stack slot / local. +typedef struct { + // Indicates the type of the stored information. It could be a LDLOC or a LDC opcode + int opcode; + // If opcode is a LDLOC, this is the local_index + int data; +} StackValue; + typedef struct { + // This indicates what is currently stored in this stack slot. This can be a constant + // or the copy of a local. + StackValue val; + // The instruction that pushed this stack slot. If ins is null, we can't remove the usage + // of the stack slot, because we can't clear the instruction that set it. InterpInst *ins; } StackContentInfo; @@ -6083,11 +6097,10 @@ get_inst_stack_usage (TransformData *td, InterpInst *ins, int *pop, int *push) int param_count = ins->data [1]; gboolean is_inlined = ins->data [0] == 0xffff; if (is_inlined) { - // FIXME - // We lose track of the contents of the stack because the newobj references are pushed below - // the ctor arguments. We should keep track of stack contents to enable ctor optimization. - *pop = param_count; - *push = param_count + 2; + // This needs to be handled explictly during cprop, in order to properly + // keep track of stack contents + *pop = 0; + *push = 2; } else { *pop = param_count; *push = 1; @@ -6111,20 +6124,6 @@ get_inst_stack_usage (TransformData *td, InterpInst *ins, int *pop, int *push) *push = 1; break; } - case MINT_BOX: - case MINT_BOX_VT: - case MINT_BOX_NULLABLE: - *pop = (ins->data [1] & ~BOX_NOT_CLEAR_VT_SP) + 1; - *push = *pop; - break; - case MINT_CKNULL_N: - *pop = ins->data [0]; - *push = ins->data [0]; - break; - case MINT_LD_DELEGATE_INVOKE_IMPL: - *pop = ins->data [0]; - *push = ins->data [0] + 1; - break; case MINT_INTRINS_BYREFERENCE_CTOR: { InterpMethod *imethod = (InterpMethod*) td->data_items [ins->data [0]]; *pop = imethod->param_count; @@ -6318,10 +6317,10 @@ clear_stack_content_info_for_local (StackContentInfo *start, StackContentInfo *e { StackContentInfo *si; for (si = start; si < end; si++) { - if (si->ins) { - g_assert (MINT_IS_LDLOC (si->ins->opcode)); - if (si->ins->data [0] == local) - si->ins = NULL; + if (si->val.opcode != MINT_NOP) { + g_assert (MINT_IS_LDLOC (si->val.opcode)); + if (si->val.data == local) + si->val.opcode = MINT_NOP; } } } @@ -6329,15 +6328,14 @@ clear_stack_content_info_for_local (StackContentInfo *start, StackContentInfo *e // The value of local has changed. This means we can no longer assume that any other local // is a copy of this local. static void -clear_local_content_info_for_local (StackContentInfo *start, StackContentInfo *end, int local) +clear_local_content_info_for_local (StackValue *start, StackValue *end, int local) { - StackContentInfo *si; - for (si = start; si < end; si++) { - if (si->ins) { - g_assert (MINT_IS_MOVLOC (si->ins->opcode)); - g_assert (si->ins->data [1] == (guint16)(si - start)); - if (si->ins->data [0] == local) - si->ins = NULL; + StackValue *sval; + for (sval = start; sval < end; sval++) { + if (sval->opcode != MINT_NOP) { + g_assert (MINT_IS_LDLOC (sval->opcode)); + if (sval->data == local) + sval->opcode = MINT_NOP; } } } @@ -6384,7 +6382,7 @@ interp_cprop (TransformData *td) StackContentInfo *stack = (StackContentInfo*) g_malloc (td->max_stack_height * sizeof (StackContentInfo)); StackContentInfo *stack_end = stack + td->max_stack_height; StackContentInfo *sp = stack; - StackContentInfo *locals = (StackContentInfo*) g_malloc (td->locals_size * sizeof (StackContentInfo)); + StackValue *locals = (StackValue*) g_malloc (td->locals_size * sizeof (StackValue)); int *local_ref_count = (int*) g_malloc0 (td->locals_size * sizeof (int)); InterpInst *ins; int last_il_offset = -1; @@ -6399,13 +6397,15 @@ interp_cprop (TransformData *td) if (is_bb_start) { if (td->stack_height [il_offset] >= 0) { sp = stack + td->stack_height [il_offset]; - g_assert (sp >= stack); + g_assert (sp <= stack_end); memset (stack, 0, (sp - stack) * sizeof (StackContentInfo)); } - memset (locals, 0, td->locals_size * sizeof (StackContentInfo)); + memset (locals, 0, td->locals_size * sizeof (StackValue)); } // The instruction pops some values then pushes some other get_inst_stack_usage (td, ins, &pop, &push); + if (td->verbose_level) + g_print ("IL_%x, sp %d, %s (pop %d, push %d)\n", ins->il_offset, sp - stack, mono_interp_opname (ins->opcode), pop, push); if (MINT_IS_LDLOC (ins->opcode)) { int replace_op = 0; int loaded_local = ins->data [0]; @@ -6420,62 +6420,118 @@ interp_cprop (TransformData *td) if (replace_op) { if (td->verbose_level) g_print ("Add stloc.np : ldloc (off %p), stloc (off %p)\n", ins->il_offset, ins->prev->il_offset); + // We know what local is on the stack now. Track it + sp->ins = NULL; + sp->val.opcode = ins->opcode; + sp->val.data = loaded_local; + + // Clear the previous stloc instruction interp_clear_ins (td, ins->prev); ins->opcode = replace_op; mono_interp_stats.stloc_nps++; local_ref_count [loaded_local]--; - // FIXME We know what local is on the stack now. Track it } } - } else if (locals [loaded_local].ins != NULL && !(td->locals [loaded_local].flags & INTERP_LOCAL_FLAG_INDIRECT)) { - g_assert (MINT_IS_MOVLOC (locals [loaded_local].ins->opcode)); + } else if (locals [loaded_local].opcode != MINT_NOP && !(td->locals [loaded_local].flags & INTERP_LOCAL_FLAG_INDIRECT)) { + g_assert (MINT_IS_LDLOC (locals [loaded_local].opcode)); // do copy propagation of the original source if (td->verbose_level) - g_print ("cprop %d -> %d\n", loaded_local, locals [loaded_local].ins->data [0]); + g_print ("cprop %d -> %d\n", loaded_local, locals [loaded_local].data); mono_interp_stats.copy_propagations++; local_ref_count [loaded_local]--; - ins->data [0] = locals [loaded_local].ins->data [0]; + ins->data [0] = locals [loaded_local].data; local_ref_count [ins->data [0]]++; } if (!replace_op) { // Save the ldloc on the stack if it wasn't optimized away // For simplicity we don't track locals that have their address taken // since it is hard to detect instructions that change the local value. - if (td->locals [loaded_local].flags & INTERP_LOCAL_FLAG_INDIRECT) - sp->ins = NULL; - else + if (td->locals [loaded_local].flags & INTERP_LOCAL_FLAG_INDIRECT) { + sp->val.opcode = MINT_NOP; + } else { sp->ins = ins; + sp->val.opcode = ins->opcode; + sp->val.data = ins->data [0]; + } } sp++; } else if (MINT_IS_STLOC (ins->opcode)) { int dest_local = ins->data [0]; sp--; - if (sp->ins != NULL) { - int mt = sp->ins->opcode - MINT_LDLOC_I1; + if (sp->val.opcode != MINT_NOP) { + g_assert (MINT_IS_LDLOC (sp->val.opcode)); + int mt = sp->val.opcode - MINT_LDLOC_I1; if (ins->opcode - MINT_STLOC_I1 == mt) { - // Same local, same type of load and store, convert to movloc - if (td->verbose_level) - g_print ("Add movloc : ldloc (off %p), stloc (off %p)\n", sp->ins->il_offset, ins->il_offset); - int src_local = sp->ins->data [0]; - interp_clear_ins (td, sp->ins); - interp_clear_ins (td, ins); - - ins = interp_insert_ins (td, ins, get_movloc_for_type (mt)); - ins->data [0] = src_local; - ins->data [1] = dest_local; - if (ins->opcode == MINT_MOVLOC_VT) - ins->data [2] = sp->ins->data [1]; - mono_interp_stats.movlocs++; + // Same local, same type of load and store. Propagate value + int src_local = sp->val.data; + int vtsize = (ins->opcode == MINT_STLOC_VT) ? ins->data [1] : 0; + // Track what exactly is stored into local - locals [dest_local].ins = ins; + locals [dest_local].opcode = sp->val.opcode; + locals [dest_local].data = src_local; + + if (sp->ins) { + // Clear ldloc / stloc pair and replace it with movloc superinstruction + if (td->verbose_level) + g_print ("Add movloc : ldloc (off %p), stloc (off %p)\n", sp->ins->il_offset, ins->il_offset); + interp_clear_ins (td, sp->ins); + interp_clear_ins (td, ins); + + ins = interp_insert_ins (td, ins, get_movloc_for_type (mt)); + ins->data [0] = src_local; + ins->data [1] = dest_local; + if (vtsize) + ins->data [2] = vtsize; + mono_interp_stats.movlocs++; + } } else { - locals [dest_local].ins = NULL; + locals [dest_local].opcode = MINT_NOP; } } else { - locals [dest_local].ins = NULL; + locals [dest_local].opcode = MINT_NOP; } clear_stack_content_info_for_local (stack, sp, dest_local); clear_local_content_info_for_local (locals, locals + td->locals_size, dest_local); + } else if (ins->opcode == MINT_DUP || ins->opcode == MINT_DUP_VT) { + sp [0].val.opcode = sp [-1].val.opcode; + sp [0].val.data = sp [-1].val.data; + sp [0].ins = ins; + // If top of stack is known, we could also replace dup with an explicit + // propagated instruction, so we remove the top of stack dependency + sp [-1].ins = NULL; + sp++; + } else if (ins->opcode >= MINT_BOX && ins->opcode <= MINT_BOX_NULLABLE) { + int offset = (ins->data [1] & ~BOX_NOT_CLEAR_VT_SP); + // Clear the stack slot that is boxed + memset (&sp [-1 - offset], 0, sizeof (StackContentInfo)); + // Make sure that the instructions that pushed this stack slot can't be + // optimized away. If we would optimize them away, we would also need to + // update the offset in the box instruction, which we can't, for now. + for (int i = 1; i <= offset; i++) + sp [-i].ins = NULL; + } else if (ins->opcode == MINT_CKNULL_N) { + int offset = ins->data [0]; + for (int i = 1; i <= offset; i++) + sp [-i].ins = NULL; + } else if (ins->opcode == MINT_LD_DELEGATE_INVOKE_IMPL) { + int offset = ins->data [0]; + for (int i = 1; i <= offset; i++) + sp [-i].ins = NULL; + memset (sp, 0, sizeof (StackContentInfo)); + sp++; + } else if (ins->opcode == MINT_NEWOBJ_FAST && ins->data [0] == 0xffff) { + int param_count = ins->data [1]; + // memmove the stack values while clearing ins, to prevent instruction removal + for (int i = 1; i <= param_count; i++) { + sp [-i + 2] = sp [-i]; + sp [-i + 2].ins = NULL; + } + // clear stack information for the slots where the allocated object resides + memset (&sp [-param_count], 0, 2 * sizeof (StackContentInfo)); + sp += 2; + } else if (ins->opcode == MINT_CASTCLASS || ins->opcode == MINT_CASTCLASS_COMMON || ins->opcode == MINT_CASTCLASS_INTERFACE) { + // Keep the value on the stack, but prevent optimizing away + sp [-1].ins = NULL; } else { if (pop == MINT_POP_ALL) pop = sp - stack; @@ -6484,7 +6540,6 @@ interp_cprop (TransformData *td) g_assert ((sp - push) >= stack && (sp - push) <= stack_end); memset (sp - push, 0, push * sizeof (StackContentInfo)); } - // TODO handle dup last_il_offset = ins->il_offset; } -- 2.7.4