BINARY_SB(cmpeqb, "(%s == %s) ? (~0) : 0")
BINARY_SB(cmpgtsb, "(%s > %s) ? (~0) : 0")
UNARY_SB(copyb, "%s")
+UNARY_SB(loadb, "%s")
BINARY_SB(maxsb, "ORC_MAX(%s, %s)")
BINARY_UB(maxub, "ORC_MAX((orc_uint8)%s, (orc_uint8)%s)")
BINARY_SB(minsb, "ORC_MIN(%s, %s)")
BINARY_SB(shrsb, "%s >> %s")
BINARY_UB(shrub, "((orc_uint8)%s) >> %s")
UNARY_SB(signb, "ORC_CLAMP(%s,-1,1)")
+UNARY_SB(storeb, "%s")
BINARY_SB(subb, "%s - %s")
BINARY_SB(subssb, "ORC_CLAMP_SB(%s - %s)")
BINARY_UB(subusb, "ORC_CLAMP_UB((orc_uint8)%s - (orc_uint8)%s)")
BINARY_SW(cmpeqw, "(%s == %s) ? (~0) : 0")
BINARY_SW(cmpgtsw, "(%s > %s) ? (~0) : 0")
UNARY_SW(copyw, "%s")
+UNARY_SW(loadw, "%s")
BINARY_SW(maxsw, "ORC_MAX(%s, %s)")
BINARY_UW(maxuw, "ORC_MAX((orc_uint16)%s, (orc_uint16)%s)")
BINARY_SW(minsw, "ORC_MIN(%s, %s)")
BINARY_SW(shrsw, "%s >> %s")
BINARY_UW(shruw, "((orc_uint16)%s) >> %s")
UNARY_SW(signw, "ORC_CLAMP(%s,-1,1)")
+UNARY_SW(storew, "%s")
BINARY_SW(subw, "%s - %s")
BINARY_SW(subssw, "ORC_CLAMP_SW(%s - %s)")
BINARY_UW(subusw, "ORC_CLAMP_UW((orc_uint16)%s - (orc_uint16)%s)")
BINARY_SL(cmpeql, "(%s == %s) ? (~0) : 0")
BINARY_SL(cmpgtsl, "(%s > %s) ? (~0) : 0")
UNARY_SL(copyl, "%s")
+UNARY_SL(loadl, "%s")
BINARY_SL(maxsl, "ORC_MAX(%s, %s)")
BINARY_UL(maxul, "ORC_MAX((orc_uint32)%s, (orc_uint32)%s)")
BINARY_SL(minsl, "ORC_MIN(%s, %s)")
BINARY_SL(shrsl, "%s >> %s")
BINARY_UL(shrul, "((orc_uint32)%s) >> %s")
UNARY_SL(signl, "ORC_CLAMP(%s,-1,1)")
+UNARY_SL(storel, "%s")
BINARY_SL(subl, "%s - %s")
BINARY_SL(subssl, "ORC_CLAMP_SL((orc_int64)%s - (orc_int64)%s)")
BINARY_UL(subusl, "ORC_CLAMP_UL((orc_int64)(orc_uint32)%s - (orc_int64)(orc_uint32)%s)")
void orc_compiler_assign_rules (OrcCompiler *compiler);
void orc_compiler_global_reg_alloc (OrcCompiler *compiler);
+void orc_compiler_rewrite_insns (OrcCompiler *compiler);
void orc_compiler_rewrite_vars (OrcCompiler *compiler);
void orc_compiler_rewrite_vars2 (OrcCompiler *compiler);
int orc_compiler_dup_temporary (OrcCompiler *compiler, int var, int j);
+int orc_compiler_new_temporary (OrcCompiler *compiler, int size);
void orc_compiler_check_sizes (OrcCompiler *compiler);
static char **_orc_compiler_flag_list;
orc_compiler_check_sizes (compiler);
if (compiler->error) goto error;
+ orc_compiler_rewrite_insns (compiler);
+ if (compiler->error) goto error;
+
orc_compiler_assign_rules (compiler);
if (compiler->error) goto error;
}
void
+orc_compiler_rewrite_insns (OrcCompiler *compiler)
+{
+ int i;
+ int j;
+ OrcStaticOpcode *opcode;
+ OrcProgram *program = compiler->program;
+
+ compiler->n_insns = 0;
+ for(j=0;j<program->n_insns;j++){
+ OrcInstruction insn;
+ OrcInstruction *xinsn;
+
+ memcpy (&insn, program->insns + j, sizeof(OrcInstruction));
+ opcode = insn.opcode;
+
+ if (!(opcode->flags & ORC_STATIC_OPCODE_LOAD)) {
+ for(i=0;i<ORC_STATIC_OPCODE_N_SRC;i++){
+ OrcVariable *var;
+
+ if (opcode->src_size[i] == 0) continue;
+
+ var = compiler->vars + insn.src_args[i];
+ if (var->vartype == ORC_VAR_TYPE_SRC ||
+ var->vartype == ORC_VAR_TYPE_DEST) {
+ OrcInstruction *cinsn;
+
+ cinsn = compiler->insns + compiler->n_insns;
+ compiler->n_insns++;
+
+ if (var->size == 1) {
+ cinsn->opcode = orc_opcode_find_by_name ("loadb");
+ } else if (var->size == 2) {
+ cinsn->opcode = orc_opcode_find_by_name ("loadw");
+ } else {
+ cinsn->opcode = orc_opcode_find_by_name ("loadl");
+ }
+ cinsn->dest_args[0] = orc_compiler_new_temporary (compiler, var->size);
+ cinsn->src_args[0] = insn.src_args[i];
+ insn.src_args[i] = cinsn->dest_args[0];
+ }
+ }
+ }
+
+ xinsn = compiler->insns + compiler->n_insns;
+ memcpy (xinsn, &insn, sizeof(OrcInstruction));
+ compiler->n_insns++;
+
+ if (!(opcode->flags & ORC_STATIC_OPCODE_STORE)) {
+ for(i=0;i<ORC_STATIC_OPCODE_N_DEST;i++){
+ OrcVariable *var;
+
+ if (opcode->dest_size[i] == 0) continue;
+
+ var = compiler->vars + insn.dest_args[i];
+ if (var->vartype == ORC_VAR_TYPE_DEST) {
+ OrcInstruction *cinsn;
+
+ cinsn = compiler->insns + compiler->n_insns;
+ compiler->n_insns++;
+
+ if (var->size == 1) {
+ cinsn->opcode = orc_opcode_find_by_name ("storeb");
+ } else if (var->size == 2) {
+ cinsn->opcode = orc_opcode_find_by_name ("storew");
+ } else {
+ cinsn->opcode = orc_opcode_find_by_name ("storel");
+ }
+ cinsn->src_args[0] = orc_compiler_new_temporary (compiler, var->size);
+ cinsn->dest_args[0] = xinsn->dest_args[i];
+ xinsn->dest_args[i] = cinsn->src_args[0];
+ }
+ }
+ }
+
+ }
+}
+
+void
orc_compiler_assign_rules (OrcCompiler *compiler)
{
int i;
return i;
}
+int
+orc_compiler_new_temporary (OrcCompiler *compiler, int size)
+{
+ int i = ORC_VAR_T1 + compiler->n_temp_vars + compiler->n_dup_vars;
+
+ compiler->vars[i].vartype = ORC_VAR_TYPE_TEMP;
+ compiler->vars[i].size = size;
+ compiler->vars[i].name = malloc (10);
+ sprintf(compiler->vars[i].name, "tmp%d", i);
+ compiler->n_dup_vars++;
+
+ return i;
+}
+
void
orc_compiler_dump_asm (OrcCompiler *compiler)
{
}
void
+emulate_loadb (OrcOpcodeExecutor *ex, int n)
+{
+ int i;
+ orc_int8 var0;
+ orc_int8 * ptr0;
+ orc_int8 var4;
+ const orc_int8 * ptr4;
+
+ ptr0 = (orc_int8 *)ex->dest_ptrs[0];
+ ptr4 = (orc_int8 *)ex->src_ptrs[0];
+
+ for (i = 0; i < n; i++) {
+ var4 = *ptr4;
+ ptr4++;
+ /* 0: loadb */
+ var0 = var4;
+ *ptr0 = var0;
+ ptr0++;
+ }
+
+}
+
+void
emulate_maxsb (OrcOpcodeExecutor *ex, int n)
{
int i;
}
void
+emulate_storeb (OrcOpcodeExecutor *ex, int n)
+{
+ int i;
+ orc_int8 var0;
+ orc_int8 * ptr0;
+ orc_int8 var4;
+ const orc_int8 * ptr4;
+
+ ptr0 = (orc_int8 *)ex->dest_ptrs[0];
+ ptr4 = (orc_int8 *)ex->src_ptrs[0];
+
+ for (i = 0; i < n; i++) {
+ var4 = *ptr4;
+ ptr4++;
+ /* 0: storeb */
+ var0 = var4;
+ *ptr0 = var0;
+ ptr0++;
+ }
+
+}
+
+void
emulate_subb (OrcOpcodeExecutor *ex, int n)
{
int i;
}
void
+emulate_loadw (OrcOpcodeExecutor *ex, int n)
+{
+ int i;
+ orc_int16 var0;
+ orc_int16 * ptr0;
+ orc_int16 var4;
+ const orc_int16 * ptr4;
+
+ ptr0 = (orc_int16 *)ex->dest_ptrs[0];
+ ptr4 = (orc_int16 *)ex->src_ptrs[0];
+
+ for (i = 0; i < n; i++) {
+ var4 = *ptr4;
+ ptr4++;
+ /* 0: loadw */
+ var0 = var4;
+ *ptr0 = var0;
+ ptr0++;
+ }
+
+}
+
+void
emulate_maxsw (OrcOpcodeExecutor *ex, int n)
{
int i;
}
void
+emulate_storew (OrcOpcodeExecutor *ex, int n)
+{
+ int i;
+ orc_int16 var0;
+ orc_int16 * ptr0;
+ orc_int16 var4;
+ const orc_int16 * ptr4;
+
+ ptr0 = (orc_int16 *)ex->dest_ptrs[0];
+ ptr4 = (orc_int16 *)ex->src_ptrs[0];
+
+ for (i = 0; i < n; i++) {
+ var4 = *ptr4;
+ ptr4++;
+ /* 0: storew */
+ var0 = var4;
+ *ptr0 = var0;
+ ptr0++;
+ }
+
+}
+
+void
emulate_subw (OrcOpcodeExecutor *ex, int n)
{
int i;
}
void
+emulate_loadl (OrcOpcodeExecutor *ex, int n)
+{
+ int i;
+ orc_union32 var0;
+ orc_union32 * ptr0;
+ orc_union32 var4;
+ const orc_union32 * ptr4;
+
+ ptr0 = (orc_union32 *)ex->dest_ptrs[0];
+ ptr4 = (orc_union32 *)ex->src_ptrs[0];
+
+ for (i = 0; i < n; i++) {
+ var4 = *ptr4;
+ ptr4++;
+ /* 0: loadl */
+ var0.i = var4.i;
+ *ptr0 = var0;
+ ptr0++;
+ }
+
+}
+
+void
emulate_maxsl (OrcOpcodeExecutor *ex, int n)
{
int i;
}
void
+emulate_storel (OrcOpcodeExecutor *ex, int n)
+{
+ int i;
+ orc_union32 var0;
+ orc_union32 * ptr0;
+ orc_union32 var4;
+ const orc_union32 * ptr4;
+
+ ptr0 = (orc_union32 *)ex->dest_ptrs[0];
+ ptr4 = (orc_union32 *)ex->src_ptrs[0];
+
+ for (i = 0; i < n; i++) {
+ var4 = *ptr4;
+ ptr4++;
+ /* 0: storel */
+ var0.i = var4.i;
+ *ptr0 = var0;
+ ptr0++;
+ }
+
+}
+
+void
emulate_subl (OrcOpcodeExecutor *ex, int n)
{
int i;
void emulate_n16_convfl (OrcOpcodeExecutor *ex);
void emulate_convlf (OrcOpcodeExecutor *ex, int n);
void emulate_n16_convlf (OrcOpcodeExecutor *ex);
+void emulate_loadb (OrcOpcodeExecutor *ex, int n);
+void emulate_loadw (OrcOpcodeExecutor *ex, int n);
+void emulate_loadl (OrcOpcodeExecutor *ex, int n);
+void emulate_storeb (OrcOpcodeExecutor *ex, int n);
+void emulate_storew (OrcOpcodeExecutor *ex, int n);
+void emulate_storel (OrcOpcodeExecutor *ex, int n);
#ifndef DISABLE_ORC
#include <orc/orc.h>
#endif
-#include <math.h>
#ifndef _ORC_INTEGER_TYPEDEFS_
#define _ORC_INTEGER_TYPEDEFS_
BINARY_SB(cmpeqb, (a == b) ? (~0) : 0)
BINARY_SB(cmpgtsb, (a > b) ? (~0) : 0)
UNARY_SB(copyb, a)
+UNARY_SB(loadb, a)
BINARY_SB(maxsb, (a > b) ? a : b)
BINARY_UB(maxub, (a > b) ? a : b)
BINARY_SB(minsb, (a < b) ? a : b)
BINARY_SB(shrsb, a >> b)
BINARY_UB(shrub, (a) >> b)
UNARY_SB(signb, ORC_CLAMP(a,-1,1))
+UNARY_SB(storeb, a)
BINARY_SB(subb, a - b)
BINARY_SB(subssb, ORC_CLAMP_SB(a - b))
BINARY_UB(subusb, ORC_CLAMP_UB(a - b))
BINARY_SW(cmpeqw, (a == b) ? (~0) : 0)
BINARY_SW(cmpgtsw, (a > b) ? (~0) : 0)
UNARY_SW(copyw, a)
+UNARY_SW(loadw, a)
BINARY_SW(maxsw, (a > b) ? a : b)
BINARY_UW(maxuw, (a > b) ? a : b)
BINARY_SW(minsw, (a < b) ? a : b)
BINARY_SW(shrsw, a >> b)
BINARY_UW(shruw, a >> b)
UNARY_SW(signw, ORC_CLAMP(a,-1,1))
+UNARY_SW(storew, a)
BINARY_SW(subw, a - b)
BINARY_SW(subssw, ORC_CLAMP_SW(a - b))
BINARY_UW(subusw, ORC_CLAMP_UW(a - b))
BINARY_SL(cmpeql, (a == b) ? (~0) : 0)
BINARY_SL(cmpgtsl, (a > b) ? (~0) : 0)
UNARY_SL(copyl, a)
+UNARY_SL(loadl, a)
BINARY_SL(maxsl, (a > b) ? a : b)
BINARY_UL(maxul, ((orc_uint32)a > (orc_uint32)b) ? a : b)
BINARY_SL(minsl, (a < b) ? a : b)
BINARY_SL(shrsl, a >> b)
BINARY_UL(shrul, ((orc_uint32)a) >> b)
UNARY_SL(signl, ORC_CLAMP(a,-1,1))
+UNARY_SL(storel, a)
BINARY_SL(subl, a - b)
BINARY_SL(subssl, ORC_CLAMP_SL((orc_int64)a - (orc_int64)b))
BINARY_UL(subusl, (((orc_uint32)a) < ((orc_uint32)b)) ? 0 : a - b)
{ "cmpeqb", cmpeqb, NULL, 0, { 1 }, { 1, 1 }, emulate_cmpeqb },
{ "cmpgtsb", cmpgtsb, NULL, 0, { 1 }, { 1, 1 }, emulate_cmpgtsb },
{ "copyb", copyb, NULL, 0, { 1 }, { 1 }, emulate_copyb },
+ { "loadb", loadb, NULL, ORC_STATIC_OPCODE_LOAD, { 1 }, { 1 }, emulate_loadb },
{ "maxsb", maxsb, NULL, 0, { 1 }, { 1, 1 }, emulate_maxsb },
{ "maxub", maxub, NULL, 0, { 1 }, { 1, 1 }, emulate_maxub },
{ "minsb", minsb, NULL, 0, { 1 }, { 1, 1 }, emulate_minsb },
{ "shrsb", shrsb, NULL, ORC_STATIC_OPCODE_SCALAR, { 1 }, { 1, 1 }, emulate_shrsb },
{ "shrub", shrub, NULL, ORC_STATIC_OPCODE_SCALAR, { 1 }, { 1, 1 }, emulate_shrub },
{ "signb", signb, NULL, 0, { 1 }, { 1 }, emulate_signb },
+ { "storeb", storeb, NULL, ORC_STATIC_OPCODE_STORE, { 1 }, { 1 }, emulate_storeb },
{ "subb", subb, NULL, 0, { 1 }, { 1, 1 }, emulate_subb },
{ "subssb", subssb, NULL, 0, { 1 }, { 1, 1 }, emulate_subssb },
{ "subusb", subusb, NULL, 0, { 1 }, { 1, 1 }, emulate_subusb },
{ "cmpeqw", cmpeqw, NULL, 0, { 2 }, { 2, 2 }, emulate_cmpeqw },
{ "cmpgtsw", cmpgtsw, NULL, 0, { 2 }, { 2, 2 }, emulate_cmpgtsw },
{ "copyw", copyw, NULL, 0, { 2 }, { 2 }, emulate_copyw },
+ { "loadw", loadw, NULL, ORC_STATIC_OPCODE_LOAD, { 2 }, { 2 }, emulate_loadw },
{ "maxsw", maxsw, NULL, 0, { 2 }, { 2, 2 }, emulate_maxsw },
{ "maxuw", maxuw, NULL, 0, { 2 }, { 2, 2 }, emulate_maxuw },
{ "minsw", minsw, NULL, 0, { 2 }, { 2, 2 }, emulate_minsw },
{ "shrsw", shrsw, NULL, ORC_STATIC_OPCODE_SCALAR, { 2 }, { 2, 2 }, emulate_shrsw },
{ "shruw", shruw, NULL, ORC_STATIC_OPCODE_SCALAR, { 2 }, { 2, 2 }, emulate_shruw },
{ "signw", signw, NULL, 0, { 2 }, { 2 }, emulate_signw },
+ { "storew", storew, NULL, ORC_STATIC_OPCODE_STORE, { 2 }, { 2 }, emulate_storew },
{ "subw", subw, NULL, 0, { 2 }, { 2, 2 }, emulate_subw },
{ "subssw", subssw, NULL, 0, { 2 }, { 2, 2 }, emulate_subssw },
{ "subusw", subusw, NULL, 0, { 2 }, { 2, 2 }, emulate_subusw },
{ "cmpeql", cmpeql, NULL, 0, { 4 }, { 4, 4 }, emulate_cmpeql },
{ "cmpgtsl", cmpgtsl, NULL, 0, { 4 }, { 4, 4 }, emulate_cmpgtsl },
{ "copyl", copyl, NULL, 0, { 4 }, { 4 }, emulate_copyl },
+ { "loadl", loadl, NULL, ORC_STATIC_OPCODE_LOAD, { 4 }, { 4 }, emulate_loadl },
{ "maxsl", maxsl, NULL, 0, { 4 }, { 4, 4 }, emulate_maxsl },
{ "maxul", maxul, NULL, 0, { 4 }, { 4, 4 }, emulate_maxul },
{ "minsl", minsl, NULL, 0, { 4 }, { 4, 4 }, emulate_minsl },
{ "shrsl", shrsl, NULL, ORC_STATIC_OPCODE_SCALAR, { 4 }, { 4, 4 }, emulate_shrsl },
{ "shrul", shrul, NULL, ORC_STATIC_OPCODE_SCALAR, { 4 }, { 4, 4 }, emulate_shrul },
{ "signl", signl, NULL, 0, { 4 }, { 4 }, emulate_signl },
+ { "storel", storel, NULL, ORC_STATIC_OPCODE_STORE, { 4 }, { 4 }, emulate_storel },
{ "subl", subl, NULL, 0, { 4 }, { 4, 4 }, emulate_subl },
{ "subssl", subssl, NULL, 0, { 4 }, { 4, 4 }, emulate_subssl },
{ "subusl", subusl, NULL, 0, { 4 }, { 4, 4 }, emulate_subusl },
int n_left = compiler->program->constant_n;
int save_loop_shift;
int loop_shift;
- int offset = 0;
+
+ compiler->offset = 0;
save_loop_shift = compiler->loop_shift;
while (n_left >= (1<<compiler->loop_shift)) {
ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift);
- orc_sse_emit_loop (compiler, offset, 0);
+ orc_sse_emit_loop (compiler, compiler->offset, 0);
n_left -= 1<<compiler->loop_shift;
- offset += 1<<compiler->loop_shift;
+ compiler->offset += 1<<compiler->loop_shift;
}
for(loop_shift = compiler->loop_shift-1; loop_shift>=0; loop_shift--) {
if (n_left >= (1<<loop_shift)) {
compiler->loop_shift = loop_shift;
ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", loop_shift);
- orc_sse_emit_loop (compiler, offset, 0);
+ orc_sse_emit_loop (compiler, compiler->offset, 0);
n_left -= 1<<loop_shift;
- offset += 1<<loop_shift;
+ compiler->offset += 1<<loop_shift;
}
}
compiler->loop_shift = save_loop_shift;
orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START);
ui_max = 1<<compiler->unroll_shift;
for(ui=0;ui<ui_max;ui++) {
- orc_sse_emit_loop (compiler, ui<<compiler->loop_shift,
+ compiler->offset = ui<<compiler->loop_shift;
+ orc_sse_emit_loop (compiler, compiler->offset,
(ui==ui_max-1) << (compiler->loop_shift + compiler->unroll_shift));
}
+ compiler->offset = 0;
if (compiler->loop_counter != ORC_REG_INVALID) {
orc_x86_emit_add_imm_reg (compiler, 4, -1, compiler->loop_counter, TRUE);
} else {
ORC_ASM_CODE(compiler,"\n");
#endif
- for(k=0;k<ORC_STATIC_OPCODE_N_SRC;k++){
- OrcVariable *var = compiler->vars + insn->src_args[k];
-
- if (opcode->src_size[k] == 0) continue;
-
- switch (var->vartype) {
- case ORC_VAR_TYPE_SRC:
- case ORC_VAR_TYPE_DEST:
- orc_sse_emit_load_src (compiler, var, offset*var->size);
- break;
- case ORC_VAR_TYPE_CONST:
- break;
- case ORC_VAR_TYPE_PARAM:
- break;
- case ORC_VAR_TYPE_TEMP:
- break;
- default:
- break;
- }
- }
-
rule = insn->rule;
if (rule && rule->emit) {
if (!(insn->opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) &&
} else {
ORC_COMPILER_ERROR(compiler,"No rule for: %s", opcode->name);
}
-
- for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++){
- OrcVariable *var = compiler->vars + insn->dest_args[k];
-
- if (opcode->dest_size[k] == 0) continue;
-
- switch (var->vartype) {
- case ORC_VAR_TYPE_DEST:
- orc_sse_emit_store_dest (compiler, var, offset*var->size);
- break;
- case ORC_VAR_TYPE_TEMP:
- break;
- default:
- break;
- }
- }
}
if (update) {
#define ORC_STATIC_OPCODE_FLOAT_DEST (1<<2)
#define ORC_STATIC_OPCODE_FLOAT (ORC_STATIC_OPCODE_FLOAT_SRC|ORC_STATIC_OPCODE_FLOAT_DEST)
#define ORC_STATIC_OPCODE_SCALAR (1<<3)
+#define ORC_STATIC_OPCODE_LOAD (1<<4)
+#define ORC_STATIC_OPCODE_STORE (1<<5)
struct _OrcStaticOpcode {
int alloc_loop_counter;
int loop_counter;
int size_region;
+
+ int offset;
};
#define ORC_SRC_ARG(p,i,n) ((p)->vars[(i)->src_args[(n)]].alloc)
}
static void
+sse_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
+{
+ OrcVariable *src = compiler->vars + insn->src_args[0];
+ OrcVariable *dest = compiler->vars + insn->dest_args[0];
+ int ptr_reg;
+ int offset = 0;
+
+ offset = compiler->offset * src->size;
+ if (src->ptr_register == 0) {
+ int i = insn->src_args[0];
+ orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]),
+ compiler->exec_reg, compiler->gp_tmpreg);
+ ptr_reg = compiler->gp_tmpreg;
+ } else {
+ ptr_reg = src->ptr_register;
+ }
+ switch (src->size << compiler->loop_shift) {
+ case 1:
+ orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc);
+ break;
+ case 2:
+ orc_x86_emit_mov_memoffset_reg (compiler, 2, offset, ptr_reg,
+ compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc);
+ break;
+ case 4:
+ orc_x86_emit_mov_memoffset_sse (compiler, 4, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ case 8:
+ orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ case 16:
+ orc_x86_emit_mov_memoffset_sse (compiler, 16, offset, ptr_reg,
+ dest->alloc, src->is_aligned);
+ break;
+ default:
+ ORC_COMPILER_ERROR(compiler,"bad load size %d",
+ src->size << compiler->loop_shift);
+ break;
+ }
+}
+
+static void
+sse_rule_storeX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
+{
+ OrcVariable *src = compiler->vars + insn->src_args[0];
+ OrcVariable *dest = compiler->vars + insn->dest_args[0];
+ int offset;
+ int ptr_reg;
+
+ offset = compiler->offset * dest->size;
+ if (dest->ptr_register == 0) {
+ orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4,
+ dest->ptr_offset, compiler->exec_reg, compiler->gp_tmpreg);
+ ptr_reg = compiler->gp_tmpreg;
+ } else {
+ ptr_reg = dest->ptr_register;
+ }
+ switch (dest->size << compiler->loop_shift) {
+ case 1:
+ /* FIXME we might be using ecx twice here */
+ if (ptr_reg == compiler->gp_tmpreg) {
+ ORC_COMPILER_ERROR(compiler,"unimplemented");
+ }
+ orc_x86_emit_mov_sse_reg (compiler, src->alloc, compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg, offset, ptr_reg);
+ break;
+ case 2:
+ /* FIXME we might be using ecx twice here */
+ if (ptr_reg == compiler->gp_tmpreg) {
+ ORC_COMPILER_ERROR(compiler,"unimplemented");
+ }
+ orc_x86_emit_mov_sse_reg (compiler, src->alloc, compiler->gp_tmpreg);
+ orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg, offset, ptr_reg);
+ break;
+ case 4:
+ orc_x86_emit_mov_sse_memoffset (compiler, 4, src->alloc, offset, ptr_reg,
+ dest->is_aligned, dest->is_uncached);
+ break;
+ case 8:
+ orc_x86_emit_mov_sse_memoffset (compiler, 8, src->alloc, offset, ptr_reg,
+ dest->is_aligned, dest->is_uncached);
+ break;
+ case 16:
+ orc_x86_emit_mov_sse_memoffset (compiler, 16, src->alloc, offset, ptr_reg,
+ dest->is_aligned, dest->is_uncached);
+ break;
+ default:
+ ORC_COMPILER_ERROR(compiler,"bad size");
+ break;
+ }
+}
+
+static void
sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->vars[insn->src_args[0]].alloc == p->vars[insn->dest_args[0]].alloc) {
rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target,
ORC_TARGET_SSE_SSE2);
+ orc_rule_register (rule_set, "loadb", sse_rule_loadX, NULL);
+ orc_rule_register (rule_set, "loadw", sse_rule_loadX, NULL);
+ orc_rule_register (rule_set, "loadl", sse_rule_loadX, NULL);
+
+ orc_rule_register (rule_set, "storeb", sse_rule_storeX, NULL);
+ orc_rule_register (rule_set, "storew", sse_rule_storeX, NULL);
+ orc_rule_register (rule_set, "storel", sse_rule_storeX, NULL);
+
REG(addb);
REG(addssb);
REG(addusb);