From d9473016a6e6e61993431cd53ad755c4a9496a76 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Thu, 12 May 2011 11:28:32 -0700 Subject: [PATCH] mmx: convert from sse --- orc/orcprogram-mmx.c | 21 ++++++++++++--------- orc/orcrules-mmx.c | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/orc/orcprogram-mmx.c b/orc/orcprogram-mmx.c index 68e7975..258e0ba 100644 --- a/orc/orcprogram-mmx.c +++ b/orc/orcprogram-mmx.c @@ -694,7 +694,7 @@ orc_emit_split_3_regions (OrcCompiler *compiler) orc_x86_emit_jmp (compiler, 7); /* else, iterations are all unaligned: n1=n, n2=0, n3=0 */ - orc_x86_emit_label (compiler, 6, 0); + orc_x86_emit_label (compiler, 6); orc_x86_emit_mov_memoffset_reg (compiler, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, X86_EAX); @@ -706,7 +706,7 @@ orc_emit_split_3_regions (OrcCompiler *compiler) orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); - orc_x86_emit_label (compiler, 7, 0); + orc_x86_emit_label (compiler, 7); } static void @@ -787,6 +787,7 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) compiler->asm_code = NULL; compiler->asm_code_len = 0; memset (compiler->labels, 0, sizeof (compiler->labels)); + memset (compiler->labels_int, 0, sizeof (compiler->labels_int)); compiler->n_fixups = 0; compiler->n_output_insns = 0; } @@ -822,7 +823,7 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) compiler->exec_reg); } - orc_x86_emit_label (compiler, LABEL_OUTER_LOOP, 0); + orc_x86_emit_label (compiler, LABEL_OUTER_LOOP); } if (compiler->program->constant_n > 0 && @@ -901,14 +902,14 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); orc_x86_emit_je (compiler, LABEL_STEP_UP(compiler->loop_shift)); orc_mmx_emit_loop (compiler, 0, 1<loop_shift); - orc_x86_emit_label (compiler, LABEL_STEP_UP(compiler->loop_shift), 0); + orc_x86_emit_label (compiler, LABEL_STEP_UP(compiler->loop_shift)); } compiler->loop_shift = save_loop_shift; compiler->vars[align_var].is_aligned = TRUE; } - orc_x86_emit_label (compiler, LABEL_REGION1_SKIP, 0); + orc_x86_emit_label (compiler, LABEL_REGION1_SKIP); orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); @@ -921,7 +922,8 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) } ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); - orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START, 0); + orc_x86_emit_align (compiler, 4); + orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START); ui_max = 1<unroll_shift; for(ui=0;uioffset = ui<loop_shift; @@ -937,7 +939,7 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) compiler->exec_reg); } orc_x86_emit_jne (compiler, LABEL_INNER_LOOP_START); - orc_x86_emit_label (compiler, LABEL_REGION2_SKIP, 0); + orc_x86_emit_label (compiler, LABEL_REGION2_SKIP); if (emit_region3) { int save_loop_shift; @@ -954,7 +956,7 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); orc_x86_emit_je (compiler, LABEL_STEP_DOWN(compiler->loop_shift)); orc_mmx_emit_loop (compiler, 0, 1<loop_shift); - orc_x86_emit_label (compiler, LABEL_STEP_DOWN(compiler->loop_shift), 0); + orc_x86_emit_label (compiler, LABEL_STEP_DOWN(compiler->loop_shift)); } compiler->loop_shift = save_loop_shift; @@ -968,7 +970,7 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]), compiler->exec_reg); orc_x86_emit_jne (compiler, LABEL_OUTER_LOOP); - orc_x86_emit_label (compiler, LABEL_OUTER_LOOP_SKIP, 0); + orc_x86_emit_label (compiler, LABEL_OUTER_LOOP_SKIP); } mmx_save_accumulators (compiler); @@ -982,6 +984,7 @@ orc_compiler_mmx_assemble (OrcCompiler *compiler) #endif orc_x86_emit_epilogue (compiler); + orc_x86_calculate_offsets (compiler); orc_x86_output_insns (compiler); orc_x86_do_fixups (compiler); diff --git a/orc/orcrules-mmx.c b/orc/orcrules-mmx.c index a98189f..923405b 100644 --- a/orc/orcrules-mmx.c +++ b/orc/orcrules-mmx.c @@ -1582,8 +1582,8 @@ mmx_rule_mulhsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) for(i=0;i<(1<insn_shift);i++) { orc_x86_emit_mov_memoffset_reg (p, 4, offset + 4*i, p->exec_reg, X86_EAX); - orc_x86_emit_cpuinsn_load_memoffset (p, ORC_X86_imul_rm, 4, 0, - offset + 16 + 4*i, p->exec_reg, -1); + orc_x86_emit_cpuinsn_memoffset (p, ORC_X86_imul_rm, 4, + offset + 16 + 4*i, p->exec_reg); orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, offset + 4*i, p->exec_reg); } @@ -1628,6 +1628,38 @@ mmx_rule_mulslq (OrcCompiler *p, void *user, OrcInstruction *insn) #ifndef MMX static void +mmx_rule_mulslq_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int i; + int regsize = p->is_64bit ? 8 : 4; + int offset = ORC_STRUCT_OFFSET(OrcExecutor,arrays[ORC_VAR_T1]); + + orc_x86_emit_mov_mmx_memoffset (p, 8, p->vars[insn->src_args[0]].alloc, + offset, p->exec_reg, FALSE, FALSE); + orc_x86_emit_mov_mmx_memoffset (p, 8, p->vars[insn->src_args[1]].alloc, + offset + 8, p->exec_reg, FALSE, FALSE); + orc_x86_emit_mov_reg_memoffset (p, regsize, X86_EAX, offset + 32, + p->exec_reg); + orc_x86_emit_mov_reg_memoffset (p, regsize, X86_EDX, offset + 40, + p->exec_reg); + + for(i=0;i<(1<insn_shift);i++) { + orc_x86_emit_mov_memoffset_reg (p, 4, offset + 4*i, p->exec_reg, X86_EAX); + orc_x86_emit_cpuinsn_memoffset (p, ORC_X86_imul_rm, 4, + offset + 8 + 4*i, p->exec_reg); + orc_x86_emit_mov_reg_memoffset (p, 4, X86_EAX, offset + 16 + 8*i, p->exec_reg); + orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, offset + 16 + 8*i + 4, p->exec_reg); + } + + orc_x86_emit_mov_memoffset_mmx (p, 16, offset + 16, p->exec_reg, + p->vars[insn->dest_args[0]].alloc, FALSE); + orc_x86_emit_mov_memoffset_reg (p, 8, offset + 32, p->exec_reg, X86_EAX); + orc_x86_emit_mov_memoffset_reg (p, 8, offset + 40, p->exec_reg, X86_EDX); +} +#endif + +#ifndef MMX +static void mmx_rule_mululq (OrcCompiler *p, void *user, OrcInstruction *insn) { int src = p->vars[insn->src_args[1]].alloc; @@ -2889,6 +2921,7 @@ orc_compiler_mmx_register_rules (OrcTarget *target) #ifndef MMX orc_rule_register (rule_set, "mulhsl", mmx_rule_mulhsl_slow, NULL); orc_rule_register (rule_set, "mulhul", mmx_rule_mulhul, NULL); + orc_rule_register (rule_set, "mulslq", mmx_rule_mulslq_slow, NULL); #endif orc_rule_register (rule_set, "mullb", mmx_rule_mullb, NULL); orc_rule_register (rule_set, "mulhsb", mmx_rule_mulhsb, NULL); -- 2.7.4