From: David Schleef Date: Sun, 1 Aug 2010 22:41:36 +0000 (-0700) Subject: Better handling of invariants X-Git-Tag: orc-0.4.7~98 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=28134046e7484ca3432fd7f824a8bf0dd3c246e6;p=platform%2Fupstream%2Forc.git Better handling of invariants --- diff --git a/orc/orccompiler.c b/orc/orccompiler.c index 90dadbd..c5a6855 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -212,10 +212,11 @@ orc_program_compile_full (OrcProgram *program, OrcTarget *target, ORC_LOG("variables"); for(i=0;ivars[i].size > 0) { - ORC_LOG("%d: %s %d %d", i, + ORC_LOG("%d: %s size %d type %d alloc %d", i, program->vars[i].name, program->vars[i].size, - program->vars[i].vartype); + program->vars[i].vartype, + program->vars[i].alloc); } } ORC_LOG("instructions"); @@ -259,6 +260,32 @@ orc_program_compile_full (OrcProgram *program, OrcTarget *target, orc_compiler_global_reg_alloc (compiler); orc_compiler_rewrite_vars2 (compiler); +#if 0 + { + ORC_ERROR("variables"); + for(i=0;ivars[i].size > 0) { + ORC_ERROR("%d: %s size %d type %d alloc %d [%d,%d]", i, + compiler->vars[i].name, + compiler->vars[i].size, + compiler->vars[i].vartype, + compiler->vars[i].alloc, + compiler->vars[i].first_use, + compiler->vars[i].last_use); + } + } + ORC_ERROR("instructions"); + for(i=0;in_insns;i++){ + ORC_ERROR("%d: %s %d %d %d %d", i, + compiler->insns[i].opcode->name, + compiler->insns[i].dest_args[0], + compiler->insns[i].dest_args[1], + compiler->insns[i].src_args[0], + compiler->insns[i].src_args[1]); + } + } +#endif + if (compiler->error) goto error; ORC_INFO("allocating code memory"); @@ -467,6 +494,10 @@ orc_compiler_rewrite_vars (OrcCompiler *compiler) int var; int actual_var; + for(j=0;jvars[j].alloc) continue; + compiler->vars[j].last_use = -1; + } for(j=0;jn_insns;j++){ insn = compiler->insns + j; opcode = insn->opcode; @@ -479,6 +510,12 @@ orc_compiler_rewrite_vars (OrcCompiler *compiler) if (compiler->vars[var].vartype == ORC_VAR_TYPE_DEST) { compiler->vars[var].load_dest = TRUE; } + if (compiler->vars[var].vartype == ORC_VAR_TYPE_SRC || + compiler->vars[var].vartype == ORC_VAR_TYPE_DEST || + compiler->vars[var].vartype == ORC_VAR_TYPE_CONST || + compiler->vars[var].vartype == ORC_VAR_TYPE_PARAM) { + continue; + } actual_var = var; if (compiler->vars[var].replaced) { @@ -502,6 +539,9 @@ orc_compiler_rewrite_vars (OrcCompiler *compiler) var = insn->dest_args[k]; + if (compiler->vars[var].vartype == ORC_VAR_TYPE_DEST) { + continue; + } if (compiler->vars[var].vartype == ORC_VAR_TYPE_SRC) { ORC_COMPILER_ERROR(compiler,"using src var as dest"); compiler->result = ORC_COMPILE_RESULT_UNKNOWN_PARSE; @@ -562,7 +602,6 @@ orc_compiler_global_reg_alloc (OrcCompiler *compiler) int i; OrcVariable *var; - for(i=0;ivars + i; if (var->name == NULL) continue; @@ -598,6 +637,20 @@ orc_compiler_global_reg_alloc (OrcCompiler *compiler) if (compiler->error) break; } + for(i=0;in_insns;i++){ + OrcInstruction *insn = compiler->insns + i; + OrcStaticOpcode *opcode = insn->opcode; + + if (opcode->flags & ORC_STATIC_OPCODE_INVARIANT) { + var = compiler->vars + insn->dest_args[0]; + + var->first_use = -1; + var->last_use = -1; + var->alloc = orc_compiler_allocate_register (compiler, TRUE); + compiler->insn_flags[i] |= ORC_INSN_FLAG_INVARIANT; + } + } + if (compiler->alloc_loop_counter && !compiler->error) { compiler->loop_counter = orc_compiler_allocate_register (compiler, FALSE); /* FIXME massive hack */ @@ -622,6 +675,8 @@ orc_compiler_rewrite_vars2 (OrcCompiler *compiler) * - src1 must be last_use * - only one dest */ + if (compiler->insn_flags[j] & ORC_INSN_FLAG_INVARIANT) continue; + if (!(compiler->insns[j].opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) && compiler->insns[j].opcode->dest_size[1] == 0) { int src1 = compiler->insns[j].src_args[0]; @@ -651,6 +706,7 @@ orc_compiler_rewrite_vars2 (OrcCompiler *compiler) for(i=0;ivars[i].name == NULL) continue; + if (compiler->vars[i].last_use == -1) continue; if (compiler->vars[i].first_use == j) { if (compiler->vars[i].alloc) continue; k = orc_compiler_allocate_register (compiler, TRUE); diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index a71ab16..eb97fbe 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -23,6 +23,7 @@ void orc_compiler_sse_init (OrcCompiler *compiler); unsigned int orc_compiler_sse_get_default_flags (void); void orc_compiler_sse_assemble (OrcCompiler *compiler); void orc_compiler_sse_register_rules (OrcTarget *target); +void orc_sse_emit_invariants (OrcCompiler *compiler); void orc_compiler_rewrite_vars (OrcCompiler *compiler); @@ -303,6 +304,9 @@ sse_load_constants_outer (OrcCompiler *compiler) break; } } + + orc_sse_emit_invariants (compiler); + } void @@ -322,8 +326,6 @@ sse_load_constants_inner (OrcCompiler *compiler) orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4, (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]), compiler->exec_reg, compiler->vars[i].ptr_register); - } else { - ORC_COMPILER_ERROR(compiler,"unimplemented"); } break; case ORC_VAR_TYPE_ACCUMULATOR: @@ -687,23 +689,13 @@ orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) insn = compiler->insns + j; opcode = insn->opcode; - ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name); + if (compiler->insn_flags[j] & ORC_INSN_FLAG_INVARIANT) continue; -#if 0 - /* set up args */ - for(k=0;kn_src + opcode->n_dest;k++){ - args[k] = compiler->vars + insn->args[k]; - ORC_ASM_CODE(compiler," %d", args[k]->alloc); - if (args[k]->is_chained) { - ORC_ASM_CODE(compiler," (chained)"); - } - } - ORC_ASM_CODE(compiler,"\n"); -#endif + ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name); rule = insn->rule; if (rule && rule->emit) { - if (!(insn->opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) && + if (!(insn->opcode->flags & (ORC_STATIC_OPCODE_ACCUMULATOR|ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_STORE)) && compiler->vars[insn->dest_args[0]].alloc != compiler->vars[insn->src_args[0]].alloc) { orc_x86_emit_mov_sse_reg_reg (compiler, @@ -736,3 +728,28 @@ orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) } } +void +orc_sse_emit_invariants (OrcCompiler *compiler) +{ + int j; + OrcInstruction *insn; + OrcStaticOpcode *opcode; + OrcRule *rule; + + for(j=0;jn_insns;j++){ + insn = compiler->insns + j; + opcode = insn->opcode; + + if (!(compiler->insn_flags[j] & ORC_INSN_FLAG_INVARIANT)) continue; + + ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name); + + rule = insn->rule; + if (rule && rule->emit) { + rule->emit (compiler, rule->emit_user, insn); + } else { + ORC_COMPILER_ERROR(compiler,"No rule for: %s", opcode->name); + } + } +} + diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 1cff1f8..ae5e558 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -363,6 +363,7 @@ struct _OrcCompiler { OrcInstruction insns[ORC_N_INSNS]; int n_insns; + int insn_flags[ORC_N_INSNS]; OrcVariable vars[ORC_N_VARIABLES]; int n_temp_vars; @@ -410,6 +411,8 @@ struct _OrcCompiler { int offset; }; +#define ORC_INSN_FLAG_INVARIANT 1 + #define ORC_SRC_ARG(p,i,n) ((p)->vars[(i)->src_args[(n)]].alloc) #define ORC_DEST_ARG(p,i,n) ((p)->vars[(i)->dest_args[(n)]].alloc) #define ORC_SRC_TYPE(p,i,n) ((p)->vars[(i)->src_args[(n)]].vartype) diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 922cb96..14c9290 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -25,25 +25,14 @@ sse_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) if (src->vartype == ORC_VAR_TYPE_PARAM) { reg = dest->alloc; - switch (src->size) { - case 1: - orc_x86_emit_mov_memoffset_sse (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), - compiler->exec_reg, reg, FALSE); - orc_sse_emit_punpcklbw (compiler, reg, reg); - orc_sse_emit_pshuflw (compiler, 0, reg, reg); - break; - case 2: - orc_x86_emit_mov_memoffset_sse (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), - compiler->exec_reg, reg, FALSE); - orc_sse_emit_pshuflw (compiler, 0, reg, reg); - break; - case 4: - orc_x86_emit_mov_memoffset_sse (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), - compiler->exec_reg, reg, FALSE); - break; + orc_x86_emit_mov_memoffset_sse (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), + compiler->exec_reg, reg, FALSE); + if (src->size == 1) { + orc_sse_emit_punpcklbw (compiler, reg, reg); + } + if (src->size <= 2) { + orc_sse_emit_pshuflw (compiler, 0, reg, reg); } orc_sse_emit_pshufd (compiler, 0, reg, reg); } else if (src->vartype == ORC_VAR_TYPE_CONST) { @@ -299,14 +288,32 @@ sse_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn) int src2 = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; + int tmp2 = X86_XMM7; - if (p->loop_shift < 2) { - ORC_COMPILER_ERROR(p, "accsadubl SSE rule fails with loop_shift < 2"); - p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE; + if (p->loop_shift == 0) { + orc_sse_emit_movdqa (p, src1, tmp); + orc_sse_emit_pslldq (p, 15, tmp); + orc_sse_emit_movdqa (p, src2, tmp2); + orc_sse_emit_pslldq (p, 15, tmp2); + orc_sse_emit_psadbw (p, tmp2, tmp); + } else if (p->loop_shift == 1) { + orc_sse_emit_movdqa (p, src1, tmp); + orc_sse_emit_pslldq (p, 14, tmp); + orc_sse_emit_movdqa (p, src2, tmp2); + orc_sse_emit_pslldq (p, 14, tmp2); + orc_sse_emit_psadbw (p, tmp2, tmp); + } else if (p->loop_shift == 2) { + orc_sse_emit_movdqa (p, src1, tmp); + orc_sse_emit_psadbw (p, src2, tmp); + orc_sse_emit_pslldq (p, 12, tmp); + } else if (p->loop_shift == 3) { + orc_sse_emit_movdqa (p, src1, tmp); + orc_sse_emit_psadbw (p, src2, tmp); + orc_sse_emit_pslldq (p, 8, tmp); + } else { + orc_sse_emit_movdqa (p, src1, tmp); + orc_sse_emit_psadbw (p, src2, tmp); } - - orc_sse_emit_movdqa (p, src1, tmp); - orc_sse_emit_psadbw (p, src2, tmp); orc_sse_emit_paddd (p, tmp, dest); } diff --git a/testsuite/test_accsadubl.c b/testsuite/test_accsadubl.c index b64656b..1388045 100644 --- a/testsuite/test_accsadubl.c +++ b/testsuite/test_accsadubl.c @@ -70,7 +70,7 @@ orc_sad_u8 (orc_uint8 *s1, orc_uint8 *s2, int n) return 0; } - printf("%s\n", orc_program_get_asm_code (p)); + //printf("%s\n", orc_program_get_asm_code (p)); } ex = orc_executor_new (p);