From 73b9148f092704e80273ab316cdf2300ee9b5533 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Thu, 12 May 2011 11:18:30 -0700 Subject: [PATCH] x86: Add branch relaxation --- orc/orcprogram-sse.c | 21 +++++---- orc/orcprogram.h | 1 + orc/orcx86.c | 21 +++------ orc/orcx86.h | 11 +++-- orc/orcx86insn.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++--- orc/orcx86insn.h | 6 ++- 6 files changed, 146 insertions(+), 36 deletions(-) diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index ab4bf3a..d565850 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -694,7 +694,7 @@ orc_emit_split_3_regions (OrcCompiler *compiler) orc_x86_emit_jmp (compiler, 7); /* else, iterations are all unaligned: n1=n, n2=0, n3=0 */ - orc_x86_emit_label (compiler, 6, 0); + orc_x86_emit_label (compiler, 6); orc_x86_emit_mov_memoffset_reg (compiler, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, X86_EAX); @@ -706,7 +706,7 @@ orc_emit_split_3_regions (OrcCompiler *compiler) orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); - orc_x86_emit_label (compiler, 7, 0); + orc_x86_emit_label (compiler, 7); } static void @@ -787,6 +787,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) compiler->asm_code = NULL; compiler->asm_code_len = 0; memset (compiler->labels, 0, sizeof (compiler->labels)); + memset (compiler->labels_int, 0, sizeof (compiler->labels_int)); compiler->n_fixups = 0; compiler->n_output_insns = 0; } @@ -822,7 +823,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) compiler->exec_reg); } - orc_x86_emit_label (compiler, LABEL_OUTER_LOOP, 0); + orc_x86_emit_label (compiler, LABEL_OUTER_LOOP); } if (compiler->program->constant_n > 0 && @@ -901,14 +902,14 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); orc_x86_emit_je (compiler, LABEL_STEP_UP(compiler->loop_shift)); orc_sse_emit_loop (compiler, 0, 1<loop_shift); - orc_x86_emit_label (compiler, LABEL_STEP_UP(compiler->loop_shift), 0); + orc_x86_emit_label (compiler, LABEL_STEP_UP(compiler->loop_shift)); } compiler->loop_shift = save_loop_shift; compiler->vars[align_var].is_aligned = TRUE; } - orc_x86_emit_label (compiler, LABEL_REGION1_SKIP, 0); + orc_x86_emit_label (compiler, LABEL_REGION1_SKIP); orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); @@ -921,7 +922,8 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) } ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); - orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START, 4); + orc_x86_emit_align (compiler, 4); + orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START); ui_max = 1<unroll_shift; for(ui=0;uioffset = ui<loop_shift; @@ -937,7 +939,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) compiler->exec_reg); } orc_x86_emit_jne (compiler, LABEL_INNER_LOOP_START); - orc_x86_emit_label (compiler, LABEL_REGION2_SKIP, 0); + orc_x86_emit_label (compiler, LABEL_REGION2_SKIP); if (emit_region3) { int save_loop_shift; @@ -954,7 +956,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); orc_x86_emit_je (compiler, LABEL_STEP_DOWN(compiler->loop_shift)); orc_sse_emit_loop (compiler, 0, 1<loop_shift); - orc_x86_emit_label (compiler, LABEL_STEP_DOWN(compiler->loop_shift), 0); + orc_x86_emit_label (compiler, LABEL_STEP_DOWN(compiler->loop_shift)); } compiler->loop_shift = save_loop_shift; @@ -968,7 +970,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]), compiler->exec_reg); orc_x86_emit_jne (compiler, LABEL_OUTER_LOOP); - orc_x86_emit_label (compiler, LABEL_OUTER_LOOP_SKIP, 0); + orc_x86_emit_label (compiler, LABEL_OUTER_LOOP_SKIP); } sse_save_accumulators (compiler); @@ -982,6 +984,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) #endif orc_x86_emit_epilogue (compiler); + orc_x86_calculate_offsets (compiler); orc_x86_output_insns (compiler); orc_x86_do_fixups (compiler); diff --git a/orc/orcprogram.h b/orc/orcprogram.h index e237c3e..e3ff976 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -443,6 +443,7 @@ struct _OrcCompiler { OrcFixup fixups[ORC_N_FIXUPS]; int n_fixups; unsigned char *labels[ORC_N_LABELS]; + int labels_int[ORC_N_LABELS]; int n_labels; int error; diff --git a/orc/orcx86.c b/orc/orcx86.c index c003165..4c7cee7 100644 --- a/orc/orcx86.c +++ b/orc/orcx86.c @@ -412,6 +412,12 @@ x86_add_label (OrcCompiler *compiler, unsigned char *ptr, int label) } void +x86_add_label2 (OrcCompiler *compiler, int index, int label) +{ + compiler->labels_int[label] = index; +} + +void orc_x86_do_fixups (OrcCompiler *compiler) { int i; @@ -423,7 +429,7 @@ orc_x86_do_fixups (OrcCompiler *compiler) diff = ((orc_int8)ptr[0]) + (label - ptr); if (diff != (orc_int8)diff) { - ORC_COMPILER_ERROR(compiler, "short jump too long"); + ORC_COMPILER_ERROR(compiler, "short jump too long %d", diff); } ptr[0] = diff; @@ -508,19 +514,6 @@ orc_x86_emit_epilogue (OrcCompiler *compiler) orc_x86_emit_ret (compiler); } -void -orc_x86_emit_align (OrcCompiler *compiler) -{ - int diff; - int align_shift = 4; - - diff = (compiler->code - compiler->codeptr)&((1<opcode->type == ORC_X86_INSN_TYPE_LABEL) { + if (xinsn->opcode->type == ORC_X86_INSN_TYPE_ALIGN) { if (xinsn->size > 0) ORC_ASM_CODE(p,".p2align %d\n", xinsn->size); + return; + } + if (xinsn->opcode->type == ORC_X86_INSN_TYPE_LABEL) { ORC_ASM_CODE(p,"%d:\n", xinsn->label); return; } @@ -295,6 +299,7 @@ orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_REG16_REGM: case ORC_X86_INSN_TYPE_BRANCH: case ORC_X86_INSN_TYPE_LABEL: + case ORC_X86_INSN_TYPE_ALIGN: case ORC_X86_INSN_TYPE_NONE: imm_str[0] = 0; break; @@ -363,6 +368,7 @@ orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_BRANCH: case ORC_X86_INSN_TYPE_NONE: case ORC_X86_INSN_TYPE_LABEL: + case ORC_X86_INSN_TYPE_ALIGN: case ORC_X86_INSN_TYPE_IMM32_A: op1_str[0] = 0; break; @@ -446,9 +452,11 @@ orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) break; case ORC_X86_INSN_TYPE_BRANCH: sprintf (op2_str, "%d%c", xinsn->label, - (p->labels[xinsn->label]!=NULL) ? 'b' : 'f'); + (p->labels_int[xinsn->label] < + xinsn - ((OrcX86Insn *)p->output_insns)) ? 'b' : 'f'); break; case ORC_X86_INSN_TYPE_LABEL: + case ORC_X86_INSN_TYPE_ALIGN: case ORC_X86_INSN_TYPE_NONE: op2_str[0] = 0; break; @@ -532,7 +540,7 @@ orc_x86_insn_output_opcode (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_IMM32_A: output_opcode (p, xinsn->opcode, xinsn->size, 0, 0); break; - case ORC_X86_INSN_TYPE_LABEL: + case ORC_X86_INSN_TYPE_ALIGN: { int diff; int i; @@ -542,6 +550,7 @@ orc_x86_insn_output_opcode (OrcCompiler *p, OrcX86Insn *xinsn) } } break; + case ORC_X86_INSN_TYPE_LABEL: case ORC_X86_INSN_TYPE_BRANCH: break; default: @@ -605,6 +614,7 @@ orc_x86_insn_output_modrm (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_IMM32_REGM_MOV: case ORC_X86_INSN_TYPE_IMM32_A: case ORC_X86_INSN_TYPE_NONE: + case ORC_X86_INSN_TYPE_ALIGN: break; case ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT: case ORC_X86_INSN_TYPE_IMM8_REGM: @@ -705,6 +715,7 @@ orc_x86_insn_output_immediate (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_MEM: case ORC_X86_INSN_TYPE_BRANCH: case ORC_X86_INSN_TYPE_LABEL: + case ORC_X86_INSN_TYPE_ALIGN: case ORC_X86_INSN_TYPE_NONE: break; default: @@ -728,6 +739,93 @@ orc_x86_get_output_insn (OrcCompiler *p) } void +orc_x86_recalc_offsets (OrcCompiler *p) +{ + OrcX86Insn *xinsn; + int i; + unsigned char *minptr; + + minptr = p->code; + p->codeptr = p->code; + for(i=0;in_output_insns;i++){ + unsigned char *ptr; + + xinsn = ((OrcX86Insn *)p->output_insns) + i; + + xinsn->code_offset = p->codeptr - p->code; + + ptr = p->codeptr; + orc_x86_insn_output_opcode (p, xinsn); + orc_x86_insn_output_modrm (p, xinsn); + orc_x86_insn_output_immediate (p, xinsn); + + if (xinsn->opcode->type == ORC_X86_INSN_TYPE_ALIGN) { + if (xinsn->size > 0) { + minptr += ((p->code - minptr)&((1<size) - 1)); + } + } else { + minptr += p->codeptr - ptr; + if (xinsn->opcode->type == ORC_X86_INSN_TYPE_BRANCH) { + if (xinsn->size == 4) minptr -= 4; + } + } + + } + + p->codeptr = p->code; + p->n_fixups = 0; +} + +void +orc_x86_calculate_offsets (OrcCompiler *p) +{ + OrcX86Insn *xinsn; + int i; + int j; + + orc_x86_recalc_offsets (p); + + for(j=0;j<3;j++){ + int change = FALSE; + + for(i=0;in_output_insns;i++){ + OrcX86Insn *dinsn; + int diff; + + xinsn = ((OrcX86Insn *)p->output_insns) + i; + if (xinsn->opcode->type != ORC_X86_INSN_TYPE_BRANCH) { + continue; + } + + dinsn = ((OrcX86Insn *)p->output_insns) + p->labels_int[xinsn->label]; + + if (xinsn->size == 1) { + diff = dinsn->code_offset - (xinsn->code_offset + 2); + if (diff < -128 || diff > 127) { + xinsn->size = 4; + ORC_DEBUG("%d: relaxing at %d,%04x diff %d", + j, i, xinsn->code_offset, diff); + change = TRUE; + } else { + } + } else { + diff = dinsn->code_offset - (xinsn->code_offset + 2); + if (diff >= -128 && diff <= 127) { + ORC_DEBUG("%d: unrelaxing at %d,%04x diff %d", + j, i, xinsn->code_offset, diff); + xinsn->size = 1; + change = TRUE; + } + } + } + + if (!change) break; + + orc_x86_recalc_offsets (p); + } +} + +void orc_x86_output_insns (OrcCompiler *p) { OrcX86Insn *xinsn; @@ -887,23 +985,33 @@ orc_x86_emit_cpuinsn_branch (OrcCompiler *p, int index, int label) xinsn->opcode_index = index; xinsn->opcode = opcode; xinsn->label = label; - xinsn->size = p->long_jumps ? 4 : 1; + xinsn->size = 1; } void -orc_x86_emit_cpuinsn_label (OrcCompiler *p, int index, int label, - int align_shift) +orc_x86_emit_cpuinsn_align (OrcCompiler *p, int index, int align_shift) { OrcX86Insn *xinsn = orc_x86_get_output_insn (p); const OrcSysOpcode *opcode = orc_x86_opcodes + index; xinsn->opcode_index = index; xinsn->opcode = opcode; - xinsn->label = label; xinsn->size = align_shift; } void +orc_x86_emit_cpuinsn_label (OrcCompiler *p, int index, int label) +{ + OrcX86Insn *xinsn = orc_x86_get_output_insn (p); + const OrcSysOpcode *opcode = orc_x86_opcodes + index; + + xinsn->opcode_index = index; + xinsn->opcode = opcode; + xinsn->label = label; + x86_add_label2 (p, p->n_output_insns - 1, label); +} + +void orc_x86_emit_cpuinsn_none (OrcCompiler *p, int index) { OrcX86Insn *xinsn = orc_x86_get_output_insn (p); diff --git a/orc/orcx86insn.h b/orc/orcx86insn.h index 52c50c9..09a0fad 100644 --- a/orc/orcx86insn.h +++ b/orc/orcx86insn.h @@ -24,6 +24,7 @@ typedef enum { ORC_X86_INSN_TYPE_REGM_REG, ORC_X86_INSN_TYPE_REG_REGM, ORC_X86_INSN_TYPE_LABEL, + ORC_X86_INSN_TYPE_ALIGN, ORC_X86_INSN_TYPE_BRANCH, ORC_X86_INSN_TYPE_NONE, ORC_X86_INSN_TYPE_STACK, @@ -271,7 +272,7 @@ typedef enum { ORC_X86_sar_imm, ORC_X86_sar, ORC_X86_and_imm32_a, - + ORC_X86_ALIGN, } OrcX86Opcode; enum { @@ -289,15 +290,16 @@ struct _OrcX86Insn { int dest; int size; int label; - int type; int offset; int index_reg; int shift; + int code_offset; }; OrcX86Insn * orc_x86_get_output_insn (OrcCompiler *p); void orc_x86_output_insns (OrcCompiler *p); +void orc_x86_calculate_offsets (OrcCompiler *p); -- 2.7.4