static void
-orc_emit_split_n_regions (OrcCompiler *compiler)
+orc_emit_split_3_regions (OrcCompiler *compiler)
{
int align_var;
int align_shift;
orc_x86_emit_label (compiler, 7);
}
+static void
+orc_emit_split_2_regions (OrcCompiler *compiler)
+{
+ int align_var;
+ int align_shift;
+ int var_size_shift;
+
+ align_var = get_align_var (compiler);
+ var_size_shift = get_shift (compiler->vars[align_var].size);
+ align_shift = var_size_shift + compiler->loop_shift;
+
+ /* Calculate n2 */
+ orc_x86_emit_mov_memoffset_reg (compiler, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_reg (compiler, 4, compiler->gp_tmpreg, X86_EAX);
+ orc_x86_emit_sar_imm_reg (compiler, 4,
+ compiler->loop_shift + compiler->unroll_shift,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg);
+
+ /* Calculate n3 */
+ orc_x86_emit_and_imm_reg (compiler, 4,
+ (1<<(compiler->loop_shift + compiler->unroll_shift))-1, X86_EAX);
+ orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg);
+}
+
#ifndef MMX
static int
orc_program_has_float (OrcCompiler *compiler)
compiler->program->constant_n <= ORC_SSE_ALIGNED_DEST_CUTOFF) {
/* don't need to load n */
} else if (compiler->loop_shift > 0) {
- /* split n into three regions, with center region being aligned */
- orc_emit_split_n_regions (compiler);
+ if (!compiler->has_iterator_opcode) {
+ /* split n into three regions, with center region being aligned */
+ orc_emit_split_3_regions (compiler);
+ } else {
+ orc_emit_split_2_regions (compiler);
+ }
} else {
/* loop shift is 0, no need to split */
orc_x86_emit_mov_memoffset_reg (compiler, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, compiler->gp_tmpreg);
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg,
+ compiler->gp_tmpreg);
orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg);
}
}
}
compiler->loop_shift = save_loop_shift;
+
} else {
int ui, ui_max;
+ int emit_region1 = TRUE;
+ int emit_region3 = TRUE;
+
+ if (compiler->has_iterator_opcode) {
+ emit_region1 = FALSE;
+ }
+ if (compiler->loop_shift == 0) {
+ emit_region1 = FALSE;
+ emit_region3 = FALSE;
+ }
- if (compiler->loop_shift > 0) {
+ if (emit_region1) {
int save_loop_shift;
int l;
orc_x86_emit_jne (compiler, LABEL_INNER_LOOP_START);
orc_x86_emit_label (compiler, LABEL_REGION2_SKIP);
- if (compiler->loop_shift > 0) {
+ if (emit_region3) {
int save_loop_shift;
int l;
}
static void
+sse_rule_loadoffX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
+{
+ OrcVariable *src = compiler->vars + insn->src_args[0];
+ OrcVariable *dest = compiler->vars + insn->dest_args[0];
+ int ptr_reg;
+ int offset = 0;
+
+ if (compiler->vars[insn->src_args[1]].vartype != ORC_VAR_TYPE_CONST) {
+ ORC_COMPILER_ERROR(compiler, "Rule only works with consts");
+ return;
+ }
+
+ offset = (compiler->offset + compiler->vars[insn->src_args[1]].value) *
+ src->size;
+ if (src->ptr_register == 0) {
+ int i = insn->src_args[0];
+ orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]),
+ compiler->exec_reg, compiler->gp_tmpreg);
+ ptr_reg = compiler->gp_tmpreg;
+ } else {
+ ptr_reg = src->ptr_register;
+ }
+ switch (src->size << compiler->loop_shift) {
+ case 1:
+ orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc);
+ break;
+ case 2:
+ orc_x86_emit_mov_memoffset_reg (compiler, 2, offset, ptr_reg,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc);
+ break;
+ case 4:
+ orc_x86_emit_mov_memoffset_sse (compiler, 4, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ case 8:
+ orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ case 16:
+ orc_x86_emit_mov_memoffset_sse (compiler, 16, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ default:
+ ORC_COMPILER_ERROR(compiler,"bad load size %d",
+ src->size << compiler->loop_shift);
+ break;
+ }
+}
+
+static void
+sse_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn)
+{
+ OrcVariable *src = compiler->vars + insn->src_args[0];
+ OrcVariable *dest = compiler->vars + insn->dest_args[0];
+ int ptr_reg;
+ int offset = 0;
+
+ offset = compiler->offset * src->size;
+ if (src->ptr_register == 0) {
+ int i = insn->src_args[0];
+ orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]),
+ compiler->exec_reg, compiler->gp_tmpreg);
+ ptr_reg = compiler->gp_tmpreg;
+ } else {
+ ptr_reg = src->ptr_register;
+ }
+ switch (src->size << compiler->loop_shift) {
+ case 1:
+ case 2:
+ orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc);
+ break;
+ case 4:
+ orc_x86_emit_mov_memoffset_reg (compiler, 2, offset, ptr_reg,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc);
+ break;
+ case 8:
+ orc_x86_emit_mov_memoffset_sse (compiler, 4, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ case 16:
+ orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ case 32:
+ orc_x86_emit_mov_memoffset_sse (compiler, 16, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ default:
+ ORC_COMPILER_ERROR(compiler,"bad load size %d",
+ src->size << compiler->loop_shift);
+ break;
+ }
+ switch (src->size) {
+ case 1:
+ orc_sse_emit_punpcklbw (compiler, dest->alloc, dest->alloc);
+ break;
+ case 2:
+ orc_sse_emit_punpcklwd (compiler, dest->alloc, dest->alloc);
+ break;
+ case 4:
+ orc_sse_emit_punpckldq (compiler, dest->alloc, dest->alloc);
+ break;
+ }
+ /* FIXME hack */
+ if (src->ptr_register) {
+ orc_x86_emit_add_imm_reg (compiler, compiler->is_64bit ? 8 : 4,
+ -(src->size << compiler->loop_shift)>>1,
+ src->ptr_register, FALSE);
+ } else {
+ orc_x86_emit_add_imm_memoffset (compiler, compiler->is_64bit ? 8 : 4,
+ -(src->size << compiler->loop_shift)>>1,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[insn->src_args[0]]),
+ compiler->exec_reg);
+ }
+}
+
+static void
sse_rule_storeX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
{
OrcVariable *src = compiler->vars + insn->src_args[0];
orc_rule_register (rule_set, "loadb", sse_rule_loadX, NULL);
orc_rule_register (rule_set, "loadw", sse_rule_loadX, NULL);
orc_rule_register (rule_set, "loadl", sse_rule_loadX, NULL);
+ orc_rule_register (rule_set, "loadoffb", sse_rule_loadoffX, NULL);
+ orc_rule_register (rule_set, "loadoffw", sse_rule_loadoffX, NULL);
+ orc_rule_register (rule_set, "loadoffl", sse_rule_loadoffX, NULL);
+ orc_rule_register (rule_set, "loadupdb", sse_rule_loadupdb, NULL);
orc_rule_register (rule_set, "loadpb", sse_rule_loadpX, NULL);
orc_rule_register (rule_set, "loadpw", sse_rule_loadpX, NULL);
orc_rule_register (rule_set, "loadpl", sse_rule_loadpX, NULL);