From d35727f5f57c70626f0d300bc69421028775b04e Mon Sep 17 00:00:00 2001 From: David Schleef Date: Tue, 10 Aug 2010 19:39:41 -0700 Subject: [PATCH] Dynamically allocate temporary registers --- orc/orccompiler.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- orc/orcprogram-sse.c | 4 ++++ orc/orcprogram.h | 2 ++ orc/orcrules-sse.c | 47 +++++++++++++++++++++++------------------------ 4 files changed, 72 insertions(+), 26 deletions(-) diff --git a/orc/orccompiler.c b/orc/orccompiler.c index a5ffef4..3fa1d04 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -551,6 +551,45 @@ orc_compiler_assign_rules (OrcCompiler *compiler) } } +int +orc_compiler_get_temp_reg (OrcCompiler *compiler) +{ + int j; + + for(j=0;jalloc_regs[j] = 0; + } + for(j=0;jvars[j].alloc) continue; + + ORC_DEBUG("var %d: %d %d %d", j, compiler->vars[j].alloc, + compiler->vars[j].first_use, + compiler->vars[j].last_use); + + if (compiler->vars[j].first_use == -1) { + compiler->alloc_regs[compiler->vars[j].alloc] = 1; + } else if (compiler->vars[j].first_use <= compiler->insn_index && + compiler->vars[j].last_use >= compiler->insn_index) { + compiler->alloc_regs[compiler->vars[j].alloc] = 1; + } + } + + ORC_DEBUG("at insn %d", compiler->insn_index); + for(j=0;j<8;j++){ + ORC_DEBUG("xmm%d: %d %d", j, compiler->valid_regs[ORC_VEC_REG_BASE + j], + compiler->alloc_regs[ORC_VEC_REG_BASE + j]); + } + + for(j=compiler->min_temp_reg;jvalid_regs[j] && !compiler->alloc_regs[j]) { + compiler->min_temp_reg = j+1; + return j; + } + } + + return 0; +} + void orc_compiler_rewrite_vars (OrcCompiler *compiler) { @@ -877,6 +916,7 @@ int orc_compiler_get_constant (OrcCompiler *compiler, int size, int value) { int i; + int tmp; if (size < 4) { if (size < 2) { @@ -904,7 +944,8 @@ orc_compiler_get_constant (OrcCompiler *compiler, int size, int value) if (compiler->constants[i].alloc_reg != 0) {; return compiler->constants[i].alloc_reg; } - orc_compiler_load_constant (compiler, compiler->tmpreg, size, value); - return compiler->tmpreg; + tmp = orc_compiler_get_temp_reg (compiler); + orc_compiler_load_constant (compiler, tmp, size, value); + return tmp; } diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 9f8fe11..58b1e25 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -826,10 +826,14 @@ orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) insn = compiler->insns + j; opcode = insn->opcode; + compiler->insn_index = j; + if (insn->flags & ORC_INSN_FLAG_INVARIANT) continue; ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name); + compiler->min_temp_reg = ORC_VEC_REG_BASE; + rule = insn->rule; if (rule && rule->emit) { if (!(insn->opcode->flags & (ORC_STATIC_OPCODE_ACCUMULATOR|ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_STORE)) && diff --git a/orc/orcprogram.h b/orc/orcprogram.h index b3862ff..97ea55c 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -431,6 +431,7 @@ struct _OrcCompiler { int has_iterator_opcode; int offset; + int min_temp_reg; }; #define ORC_SRC_ARG(p,i,n) ((p)->vars[(i)->src_args[(n)]].alloc) @@ -647,6 +648,7 @@ int orc_program_allocate_register (OrcProgram *program, int is_data); void orc_compiler_allocate_codemem (OrcCompiler *compiler); int orc_compiler_label_new (OrcCompiler *compiler); int orc_compiler_get_constant (OrcCompiler *compiler, int size, int value); +int orc_compiler_get_temp_reg (OrcCompiler *compiler); const char *orc_program_get_asm_code (OrcProgram *program); const char *orc_target_get_asm_preamble (const char *target); diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 4c6bbc5..f644e69 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -482,7 +482,7 @@ sse_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn) int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; #ifndef MMX - int tmp2 = X86_XMM7; + int tmp2 = orc_compiler_get_temp_reg (p); if (p->loop_shift <= 2) { orc_sse_emit_movdqa (p, src1, tmp); @@ -515,12 +515,11 @@ sse_rule_signX_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) int type = ORC_PTR_TO_INT(user); int tmpc; + tmpc = orc_compiler_get_constant (p, 1<vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int a = X86_XMM7; - int j = X86_XMM6; - int j2 = X86_XMM5; - int l = X86_XMM4; - int divisor = X86_XMM3; + int a = orc_compiler_get_temp_reg (p); + int j = orc_compiler_get_temp_reg (p); + int j2 = orc_compiler_get_temp_reg (p); + int l = orc_compiler_get_temp_reg (p); + int divisor = orc_compiler_get_temp_reg (p); int tmp; int i; @@ -1025,10 +1024,10 @@ sse_rule_divluw (OrcCompiler *p, void *user, OrcInstruction *insn) /* About 8.4 cycles per array member on ginger */ int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int b = X86_XMM7; - int a = X86_XMM6; - int k = X86_XMM5; - int j = X86_XMM4; + int b = orc_compiler_get_temp_reg (p); + int a = orc_compiler_get_temp_reg (p); + int k = orc_compiler_get_temp_reg (p); + int j = orc_compiler_get_temp_reg (p); int tmp; int i; @@ -1146,7 +1145,7 @@ sse_rule_mullb (OrcCompiler *p, void *user, OrcInstruction *insn) int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; - int tmp2 = X86_XMM7; + int tmp2 = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, dest, tmp); @@ -1169,7 +1168,7 @@ sse_rule_mulhsb (OrcCompiler *p, void *user, OrcInstruction *insn) int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; - int tmp2 = X86_XMM7; + int tmp2 = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_movdqa (p, dest, tmp2); @@ -1197,7 +1196,7 @@ sse_rule_mulhub (OrcCompiler *p, void *user, OrcInstruction *insn) int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; - int tmp2 = X86_XMM7; + int tmp2 = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_movdqa (p, dest, tmp2); @@ -1512,7 +1511,7 @@ sse_emit_load_mask (OrcCompiler *p, unsigned int mask1, unsigned int mask2) { int tmp = p->tmpreg; int gptmp = p->gp_tmpreg; - int tmp2 = X86_XMM7; + int tmp2 = orc_compiler_get_temp_reg (p); orc_x86_emit_mov_imm_reg (p, 4, mask1, gptmp); orc_x86_emit_mov_reg_sse (p, gptmp, tmp); @@ -1835,8 +1834,8 @@ sse_rule_addssl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; #if 0 - int tmp2 = X86_XMM7; - int tmp3 = X86_XMM6; + int tmp2 = orc_compiler_get_temp_reg (p); + int tmp3 = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_pand (p, dest, tmp); @@ -1866,8 +1865,8 @@ sse_rule_addssl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_por (p, tmp2, dest); #endif - int s = X86_XMM7; - int t = X86_XMM6; + int s = orc_compiler_get_temp_reg (p); + int t = orc_compiler_get_temp_reg (p); /* From Tim Terriberry: (slightly faster than above) @@ -1914,8 +1913,8 @@ sse_rule_subssl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; - int tmp2 = X86_XMM7; - int tmp3 = X86_XMM6; + int tmp2 = orc_compiler_get_temp_reg (p); + int tmp3 = orc_compiler_get_temp_reg (p); tmp = orc_compiler_get_constant (p, 4, 0xffffffff); orc_sse_emit_pxor (p, src, tmp); @@ -1953,7 +1952,7 @@ sse_rule_addusl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; - int tmp2 = X86_XMM7; + int tmp2 = orc_compiler_get_temp_reg (p); #if 0 /* an alternate version. slower. */ @@ -2000,7 +1999,7 @@ sse_rule_subusl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; - int tmp2 = X86_XMM7; + int tmp2 = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp2); orc_sse_emit_psrld (p, 1, tmp2); -- 2.7.4