From 41898c8acda7f9c20a452ab961b27d3c3322c1c2 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Wed, 11 Aug 2010 03:00:54 -0700 Subject: [PATCH] Use instruction size for loadp, not var size --- orc/orccompiler.c | 9 +++++++-- orc/orcrules-sse.c | 24 +++++++++++++----------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/orc/orccompiler.c b/orc/orccompiler.c index f2b18eb..1beb277 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -492,7 +492,9 @@ orc_compiler_rewrite_insns (OrcCompiler *compiler) cinsn = compiler->insns + compiler->n_insns; compiler->n_insns++; + cinsn->flags = insn.flags; cinsn->flags |= ORC_INSN_FLAG_ADDED; + cinsn->flags &= ~(ORC_INSTRUCTION_FLAG_X2|ORC_INSTRUCTION_FLAG_X4); cinsn->opcode = get_load_opcode_for_size (var->size); cinsn->dest_args[0] = orc_compiler_new_temporary (compiler, var->size); cinsn->src_args[0] = insn.src_args[i]; @@ -504,9 +506,10 @@ orc_compiler_rewrite_insns (OrcCompiler *compiler) cinsn = compiler->insns + compiler->n_insns; compiler->n_insns++; + cinsn->flags = insn.flags; cinsn->flags |= ORC_INSN_FLAG_ADDED; - cinsn->opcode = get_loadp_opcode_for_size (var->size); - cinsn->dest_args[0] = orc_compiler_new_temporary (compiler, var->size); + cinsn->opcode = get_loadp_opcode_for_size (opcode->src_size[i]); + cinsn->dest_args[0] = orc_compiler_new_temporary (compiler, opcode->src_size[i]); cinsn->src_args[0] = insn.src_args[i]; insn.src_args[i] = cinsn->dest_args[0]; } @@ -530,7 +533,9 @@ orc_compiler_rewrite_insns (OrcCompiler *compiler) cinsn = compiler->insns + compiler->n_insns; compiler->n_insns++; + cinsn->flags = xinsn->flags; cinsn->flags |= ORC_INSN_FLAG_ADDED; + cinsn->flags &= ~(ORC_INSTRUCTION_FLAG_X2|ORC_INSTRUCTION_FLAG_X4); cinsn->opcode = get_store_opcode_for_size (var->size); cinsn->src_args[0] = orc_compiler_new_temporary (compiler, var->size); cinsn->dest_args[0] = xinsn->dest_args[i]; diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index e1fb1d9..3c99f1d 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -22,6 +22,7 @@ sse_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) OrcVariable *src = compiler->vars + insn->src_args[0]; OrcVariable *dest = compiler->vars + insn->dest_args[0]; int reg; + int size = ORC_PTR_TO_INT(user); if (src->vartype == ORC_VAR_TYPE_PARAM) { reg = dest->alloc; @@ -29,23 +30,23 @@ sse_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_x86_emit_mov_memoffset_sse (compiler, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), compiler->exec_reg, reg, FALSE); - if (src->size == 1) { + if (size == 1) { orc_sse_emit_punpcklbw (compiler, reg, reg); } #ifndef MMX - if (src->size <= 2) { + if (size <= 2) { orc_sse_emit_pshuflw (compiler, 0, reg, reg); } orc_sse_emit_pshufd (compiler, 0, reg, reg); #else - if (src->size <= 2) { + if (size <= 2) { orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), reg, reg); } else { orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,0,1,0), reg, reg); } #endif } else if (src->vartype == ORC_VAR_TYPE_CONST) { - sse_load_constant (compiler, dest->alloc, src->size, src->value); + sse_load_constant (compiler, dest->alloc, size, src->value); } else { ORC_ASSERT(0); } @@ -957,13 +958,14 @@ sse_rule_div255w (OrcCompiler *p, void *user, OrcInstruction *insn) { int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - int tmp; + int tmp = orc_compiler_get_temp_reg (p); + int tmpc; if (src != dest) { orc_sse_emit_movdqa (p, src, dest); } - tmp = orc_compiler_get_temp_constant (p, 2, 0x0080); - orc_sse_emit_paddw (p, tmp, dest); + tmpc = orc_compiler_get_constant (p, 2, 0x0080); + orc_sse_emit_paddw (p, tmpc, dest); orc_sse_emit_movdqa (p, dest, tmp); orc_sse_emit_psrlw (p, 8, tmp); orc_sse_emit_paddw (p, tmp, dest); @@ -2163,10 +2165,10 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "loadoffl", sse_rule_loadoffX, NULL); orc_rule_register (rule_set, "loadupdb", sse_rule_loadupdb, NULL); orc_rule_register (rule_set, "loadupib", sse_rule_loadupib, NULL); - orc_rule_register (rule_set, "loadpb", sse_rule_loadpX, NULL); - orc_rule_register (rule_set, "loadpw", sse_rule_loadpX, NULL); - orc_rule_register (rule_set, "loadpl", sse_rule_loadpX, NULL); - orc_rule_register (rule_set, "loadpq", sse_rule_loadpX, NULL); + orc_rule_register (rule_set, "loadpb", sse_rule_loadpX, (void *)1); + orc_rule_register (rule_set, "loadpw", sse_rule_loadpX, (void *)2); + orc_rule_register (rule_set, "loadpl", sse_rule_loadpX, (void *)4); + orc_rule_register (rule_set, "loadpq", sse_rule_loadpX, (void *)8); orc_rule_register (rule_set, "storeb", sse_rule_storeX, NULL); orc_rule_register (rule_set, "storew", sse_rule_storeX, NULL); -- 2.7.4