orc_x86_emit_jmp (compiler, 7);
/* else, iterations are all unaligned: n1=n, n2=0, n3=0 */
- orc_x86_emit_label (compiler, 6, 0);
+ orc_x86_emit_label (compiler, 6);
orc_x86_emit_mov_memoffset_reg (compiler, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, X86_EAX);
orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg);
- orc_x86_emit_label (compiler, 7, 0);
+ orc_x86_emit_label (compiler, 7);
}
static void
compiler->asm_code = NULL;
compiler->asm_code_len = 0;
memset (compiler->labels, 0, sizeof (compiler->labels));
+ memset (compiler->labels_int, 0, sizeof (compiler->labels_int));
compiler->n_fixups = 0;
compiler->n_output_insns = 0;
}
compiler->exec_reg);
}
- orc_x86_emit_label (compiler, LABEL_OUTER_LOOP, 0);
+ orc_x86_emit_label (compiler, LABEL_OUTER_LOOP);
}
if (compiler->program->constant_n > 0 &&
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg);
orc_x86_emit_je (compiler, LABEL_STEP_UP(compiler->loop_shift));
orc_mmx_emit_loop (compiler, 0, 1<<compiler->loop_shift);
- orc_x86_emit_label (compiler, LABEL_STEP_UP(compiler->loop_shift), 0);
+ orc_x86_emit_label (compiler, LABEL_STEP_UP(compiler->loop_shift));
}
compiler->loop_shift = save_loop_shift;
compiler->vars[align_var].is_aligned = TRUE;
}
- orc_x86_emit_label (compiler, LABEL_REGION1_SKIP, 0);
+ orc_x86_emit_label (compiler, LABEL_REGION1_SKIP);
orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg);
}
ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift);
- orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START, 0);
+ orc_x86_emit_align (compiler, 4);
+ orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START);
ui_max = 1<<compiler->unroll_shift;
for(ui=0;ui<ui_max;ui++) {
compiler->offset = ui<<compiler->loop_shift;
compiler->exec_reg);
}
orc_x86_emit_jne (compiler, LABEL_INNER_LOOP_START);
- orc_x86_emit_label (compiler, LABEL_REGION2_SKIP, 0);
+ orc_x86_emit_label (compiler, LABEL_REGION2_SKIP);
if (emit_region3) {
int save_loop_shift;
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg);
orc_x86_emit_je (compiler, LABEL_STEP_DOWN(compiler->loop_shift));
orc_mmx_emit_loop (compiler, 0, 1<<compiler->loop_shift);
- orc_x86_emit_label (compiler, LABEL_STEP_DOWN(compiler->loop_shift), 0);
+ orc_x86_emit_label (compiler, LABEL_STEP_DOWN(compiler->loop_shift));
}
compiler->loop_shift = save_loop_shift;
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]),
compiler->exec_reg);
orc_x86_emit_jne (compiler, LABEL_OUTER_LOOP);
- orc_x86_emit_label (compiler, LABEL_OUTER_LOOP_SKIP, 0);
+ orc_x86_emit_label (compiler, LABEL_OUTER_LOOP_SKIP);
}
mmx_save_accumulators (compiler);
#endif
orc_x86_emit_epilogue (compiler);
+ orc_x86_calculate_offsets (compiler);
orc_x86_output_insns (compiler);
orc_x86_do_fixups (compiler);
for(i=0;i<(1<<p->insn_shift);i++) {
orc_x86_emit_mov_memoffset_reg (p, 4, offset + 4*i, p->exec_reg, X86_EAX);
- orc_x86_emit_cpuinsn_load_memoffset (p, ORC_X86_imul_rm, 4, 0,
- offset + 16 + 4*i, p->exec_reg, -1);
+ orc_x86_emit_cpuinsn_memoffset (p, ORC_X86_imul_rm, 4,
+ offset + 16 + 4*i, p->exec_reg);
orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, offset + 4*i, p->exec_reg);
}
#ifndef MMX
static void
+mmx_rule_mulslq_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int i;
+ int regsize = p->is_64bit ? 8 : 4;
+ int offset = ORC_STRUCT_OFFSET(OrcExecutor,arrays[ORC_VAR_T1]);
+
+ orc_x86_emit_mov_mmx_memoffset (p, 8, p->vars[insn->src_args[0]].alloc,
+ offset, p->exec_reg, FALSE, FALSE);
+ orc_x86_emit_mov_mmx_memoffset (p, 8, p->vars[insn->src_args[1]].alloc,
+ offset + 8, p->exec_reg, FALSE, FALSE);
+ orc_x86_emit_mov_reg_memoffset (p, regsize, X86_EAX, offset + 32,
+ p->exec_reg);
+ orc_x86_emit_mov_reg_memoffset (p, regsize, X86_EDX, offset + 40,
+ p->exec_reg);
+
+ for(i=0;i<(1<<p->insn_shift);i++) {
+ orc_x86_emit_mov_memoffset_reg (p, 4, offset + 4*i, p->exec_reg, X86_EAX);
+ orc_x86_emit_cpuinsn_memoffset (p, ORC_X86_imul_rm, 4,
+ offset + 8 + 4*i, p->exec_reg);
+ orc_x86_emit_mov_reg_memoffset (p, 4, X86_EAX, offset + 16 + 8*i, p->exec_reg);
+ orc_x86_emit_mov_reg_memoffset (p, 4, X86_EDX, offset + 16 + 8*i + 4, p->exec_reg);
+ }
+
+ orc_x86_emit_mov_memoffset_mmx (p, 16, offset + 16, p->exec_reg,
+ p->vars[insn->dest_args[0]].alloc, FALSE);
+ orc_x86_emit_mov_memoffset_reg (p, 8, offset + 32, p->exec_reg, X86_EAX);
+ orc_x86_emit_mov_memoffset_reg (p, 8, offset + 40, p->exec_reg, X86_EDX);
+}
+#endif
+
+#ifndef MMX
+static void
mmx_rule_mululq (OrcCompiler *p, void *user, OrcInstruction *insn)
{
int src = p->vars[insn->src_args[1]].alloc;
#ifndef MMX
orc_rule_register (rule_set, "mulhsl", mmx_rule_mulhsl_slow, NULL);
orc_rule_register (rule_set, "mulhul", mmx_rule_mulhul, NULL);
+ orc_rule_register (rule_set, "mulslq", mmx_rule_mulslq_slow, NULL);
#endif
orc_rule_register (rule_set, "mullb", mmx_rule_mullb, NULL);
orc_rule_register (rule_set, "mulhsb", mmx_rule_mulhsb, NULL);