ORC_LOG("variables");
for(i=0;i<ORC_N_VARIABLES;i++){
if (program->vars[i].size > 0) {
- ORC_LOG("%d: %s %d %d", i,
+ ORC_LOG("%d: %s size %d type %d alloc %d", i,
program->vars[i].name,
program->vars[i].size,
- program->vars[i].vartype);
+ program->vars[i].vartype,
+ program->vars[i].alloc);
}
}
ORC_LOG("instructions");
orc_compiler_global_reg_alloc (compiler);
orc_compiler_rewrite_vars2 (compiler);
+#if 0
+ {
+ ORC_ERROR("variables");
+ for(i=0;i<ORC_N_VARIABLES;i++){
+ if (compiler->vars[i].size > 0) {
+ ORC_ERROR("%d: %s size %d type %d alloc %d [%d,%d]", i,
+ compiler->vars[i].name,
+ compiler->vars[i].size,
+ compiler->vars[i].vartype,
+ compiler->vars[i].alloc,
+ compiler->vars[i].first_use,
+ compiler->vars[i].last_use);
+ }
+ }
+ ORC_ERROR("instructions");
+ for(i=0;i<compiler->n_insns;i++){
+ ORC_ERROR("%d: %s %d %d %d %d", i,
+ compiler->insns[i].opcode->name,
+ compiler->insns[i].dest_args[0],
+ compiler->insns[i].dest_args[1],
+ compiler->insns[i].src_args[0],
+ compiler->insns[i].src_args[1]);
+ }
+ }
+#endif
+
if (compiler->error) goto error;
ORC_INFO("allocating code memory");
int var;
int actual_var;
+ for(j=0;j<ORC_N_VARIABLES;j++){
+ if (compiler->vars[j].alloc) continue;
+ compiler->vars[j].last_use = -1;
+ }
for(j=0;j<compiler->n_insns;j++){
insn = compiler->insns + j;
opcode = insn->opcode;
if (compiler->vars[var].vartype == ORC_VAR_TYPE_DEST) {
compiler->vars[var].load_dest = TRUE;
}
+ if (compiler->vars[var].vartype == ORC_VAR_TYPE_SRC ||
+ compiler->vars[var].vartype == ORC_VAR_TYPE_DEST ||
+ compiler->vars[var].vartype == ORC_VAR_TYPE_CONST ||
+ compiler->vars[var].vartype == ORC_VAR_TYPE_PARAM) {
+ continue;
+ }
actual_var = var;
if (compiler->vars[var].replaced) {
var = insn->dest_args[k];
+ if (compiler->vars[var].vartype == ORC_VAR_TYPE_DEST) {
+ continue;
+ }
if (compiler->vars[var].vartype == ORC_VAR_TYPE_SRC) {
ORC_COMPILER_ERROR(compiler,"using src var as dest");
compiler->result = ORC_COMPILE_RESULT_UNKNOWN_PARSE;
int i;
OrcVariable *var;
-
for(i=0;i<ORC_N_VARIABLES;i++){
var = compiler->vars + i;
if (var->name == NULL) continue;
if (compiler->error) break;
}
+ for(i=0;i<compiler->n_insns;i++){
+ OrcInstruction *insn = compiler->insns + i;
+ OrcStaticOpcode *opcode = insn->opcode;
+
+ if (opcode->flags & ORC_STATIC_OPCODE_INVARIANT) {
+ var = compiler->vars + insn->dest_args[0];
+
+ var->first_use = -1;
+ var->last_use = -1;
+ var->alloc = orc_compiler_allocate_register (compiler, TRUE);
+ compiler->insn_flags[i] |= ORC_INSN_FLAG_INVARIANT;
+ }
+ }
+
if (compiler->alloc_loop_counter && !compiler->error) {
compiler->loop_counter = orc_compiler_allocate_register (compiler, FALSE);
/* FIXME massive hack */
* - src1 must be last_use
* - only one dest
*/
+ if (compiler->insn_flags[j] & ORC_INSN_FLAG_INVARIANT) continue;
+
if (!(compiler->insns[j].opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR)
&& compiler->insns[j].opcode->dest_size[1] == 0) {
int src1 = compiler->insns[j].src_args[0];
for(i=0;i<ORC_N_VARIABLES;i++){
if (compiler->vars[i].name == NULL) continue;
+ if (compiler->vars[i].last_use == -1) continue;
if (compiler->vars[i].first_use == j) {
if (compiler->vars[i].alloc) continue;
k = orc_compiler_allocate_register (compiler, TRUE);
unsigned int orc_compiler_sse_get_default_flags (void);
void orc_compiler_sse_assemble (OrcCompiler *compiler);
void orc_compiler_sse_register_rules (OrcTarget *target);
+void orc_sse_emit_invariants (OrcCompiler *compiler);
void orc_compiler_rewrite_vars (OrcCompiler *compiler);
break;
}
}
+
+ orc_sse_emit_invariants (compiler);
+
}
void
orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]), compiler->exec_reg,
compiler->vars[i].ptr_register);
- } else {
- ORC_COMPILER_ERROR(compiler,"unimplemented");
}
break;
case ORC_VAR_TYPE_ACCUMULATOR:
insn = compiler->insns + j;
opcode = insn->opcode;
- ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name);
+ if (compiler->insn_flags[j] & ORC_INSN_FLAG_INVARIANT) continue;
-#if 0
- /* set up args */
- for(k=0;k<opcode->n_src + opcode->n_dest;k++){
- args[k] = compiler->vars + insn->args[k];
- ORC_ASM_CODE(compiler," %d", args[k]->alloc);
- if (args[k]->is_chained) {
- ORC_ASM_CODE(compiler," (chained)");
- }
- }
- ORC_ASM_CODE(compiler,"\n");
-#endif
+ ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name);
rule = insn->rule;
if (rule && rule->emit) {
- if (!(insn->opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) &&
+ if (!(insn->opcode->flags & (ORC_STATIC_OPCODE_ACCUMULATOR|ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_STORE)) &&
compiler->vars[insn->dest_args[0]].alloc !=
compiler->vars[insn->src_args[0]].alloc) {
orc_x86_emit_mov_sse_reg_reg (compiler,
}
}
+void
+orc_sse_emit_invariants (OrcCompiler *compiler)
+{
+ int j;
+ OrcInstruction *insn;
+ OrcStaticOpcode *opcode;
+ OrcRule *rule;
+
+ for(j=0;j<compiler->n_insns;j++){
+ insn = compiler->insns + j;
+ opcode = insn->opcode;
+
+ if (!(compiler->insn_flags[j] & ORC_INSN_FLAG_INVARIANT)) continue;
+
+ ORC_ASM_CODE(compiler,"# %d: %s\n", j, insn->opcode->name);
+
+ rule = insn->rule;
+ if (rule && rule->emit) {
+ rule->emit (compiler, rule->emit_user, insn);
+ } else {
+ ORC_COMPILER_ERROR(compiler,"No rule for: %s", opcode->name);
+ }
+ }
+}
+
OrcInstruction insns[ORC_N_INSNS];
int n_insns;
+ int insn_flags[ORC_N_INSNS];
OrcVariable vars[ORC_N_VARIABLES];
int n_temp_vars;
int offset;
};
+#define ORC_INSN_FLAG_INVARIANT 1
+
#define ORC_SRC_ARG(p,i,n) ((p)->vars[(i)->src_args[(n)]].alloc)
#define ORC_DEST_ARG(p,i,n) ((p)->vars[(i)->dest_args[(n)]].alloc)
#define ORC_SRC_TYPE(p,i,n) ((p)->vars[(i)->src_args[(n)]].vartype)
if (src->vartype == ORC_VAR_TYPE_PARAM) {
reg = dest->alloc;
- switch (src->size) {
- case 1:
- orc_x86_emit_mov_memoffset_sse (compiler, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
- compiler->exec_reg, reg, FALSE);
- orc_sse_emit_punpcklbw (compiler, reg, reg);
- orc_sse_emit_pshuflw (compiler, 0, reg, reg);
- break;
- case 2:
- orc_x86_emit_mov_memoffset_sse (compiler, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
- compiler->exec_reg, reg, FALSE);
- orc_sse_emit_pshuflw (compiler, 0, reg, reg);
- break;
- case 4:
- orc_x86_emit_mov_memoffset_sse (compiler, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
- compiler->exec_reg, reg, FALSE);
- break;
+ orc_x86_emit_mov_memoffset_sse (compiler, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
+ compiler->exec_reg, reg, FALSE);
+ if (src->size == 1) {
+ orc_sse_emit_punpcklbw (compiler, reg, reg);
+ }
+ if (src->size <= 2) {
+ orc_sse_emit_pshuflw (compiler, 0, reg, reg);
}
orc_sse_emit_pshufd (compiler, 0, reg, reg);
} else if (src->vartype == ORC_VAR_TYPE_CONST) {
int src2 = p->vars[insn->src_args[1]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
int tmp = p->tmpreg;
+ int tmp2 = X86_XMM7;
- if (p->loop_shift < 2) {
- ORC_COMPILER_ERROR(p, "accsadubl SSE rule fails with loop_shift < 2");
- p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE;
+ if (p->loop_shift == 0) {
+ orc_sse_emit_movdqa (p, src1, tmp);
+ orc_sse_emit_pslldq (p, 15, tmp);
+ orc_sse_emit_movdqa (p, src2, tmp2);
+ orc_sse_emit_pslldq (p, 15, tmp2);
+ orc_sse_emit_psadbw (p, tmp2, tmp);
+ } else if (p->loop_shift == 1) {
+ orc_sse_emit_movdqa (p, src1, tmp);
+ orc_sse_emit_pslldq (p, 14, tmp);
+ orc_sse_emit_movdqa (p, src2, tmp2);
+ orc_sse_emit_pslldq (p, 14, tmp2);
+ orc_sse_emit_psadbw (p, tmp2, tmp);
+ } else if (p->loop_shift == 2) {
+ orc_sse_emit_movdqa (p, src1, tmp);
+ orc_sse_emit_psadbw (p, src2, tmp);
+ orc_sse_emit_pslldq (p, 12, tmp);
+ } else if (p->loop_shift == 3) {
+ orc_sse_emit_movdqa (p, src1, tmp);
+ orc_sse_emit_psadbw (p, src2, tmp);
+ orc_sse_emit_pslldq (p, 8, tmp);
+ } else {
+ orc_sse_emit_movdqa (p, src1, tmp);
+ orc_sse_emit_psadbw (p, src2, tmp);
}
-
- orc_sse_emit_movdqa (p, src1, tmp);
- orc_sse_emit_psadbw (p, src2, tmp);
orc_sse_emit_paddd (p, tmp, dest);
}
return 0;
}
- printf("%s\n", orc_program_get_asm_code (p));
+ //printf("%s\n", orc_program_get_asm_code (p));
}
ex = orc_executor_new (p);