From 13b0eaf78cb5aaffd90887ffb9118cc1fb4d9982 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Wed, 28 Jul 2010 21:15:07 -0700 Subject: [PATCH] Add load/store opcodes Also rewrite programs to use them instead of having the backends separately handle load/stores. Breaks non-SSE backends. --- orc/opcodes.h | 6 +++ orc/orccompiler.c | 97 ++++++++++++++++++++++++++++++++++ orc/orcemulateopcodes.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++ orc/orcemulateopcodes.h | 6 +++ orc/orcfunctions.c | 1 - orc/orcopcodes.c | 12 +++++ orc/orcprogram-sse.c | 52 ++++-------------- orc/orcprogram.h | 4 ++ orc/orcrules-sse.c | 107 +++++++++++++++++++++++++++++++++++++ 9 files changed, 379 insertions(+), 44 deletions(-) diff --git a/orc/opcodes.h b/orc/opcodes.h index 15883ef..5f26be1 100644 --- a/orc/opcodes.h +++ b/orc/opcodes.h @@ -9,6 +9,7 @@ BINARY_UB(avgub, "((orc_uint8)%s + (orc_uint8)%s + 1)>>1") BINARY_SB(cmpeqb, "(%s == %s) ? (~0) : 0") BINARY_SB(cmpgtsb, "(%s > %s) ? (~0) : 0") UNARY_SB(copyb, "%s") +UNARY_SB(loadb, "%s") BINARY_SB(maxsb, "ORC_MAX(%s, %s)") BINARY_UB(maxub, "ORC_MAX((orc_uint8)%s, (orc_uint8)%s)") BINARY_SB(minsb, "ORC_MIN(%s, %s)") @@ -21,6 +22,7 @@ BINARY_SB(shlb, "%s << %s") BINARY_SB(shrsb, "%s >> %s") BINARY_UB(shrub, "((orc_uint8)%s) >> %s") UNARY_SB(signb, "ORC_CLAMP(%s,-1,1)") +UNARY_SB(storeb, "%s") BINARY_SB(subb, "%s - %s") BINARY_SB(subssb, "ORC_CLAMP_SB(%s - %s)") BINARY_UB(subusb, "ORC_CLAMP_UB((orc_uint8)%s - (orc_uint8)%s)") @@ -37,6 +39,7 @@ BINARY_UW(avguw, "((orc_uint16)%s + (orc_uint16)%s + 1)>>1") BINARY_SW(cmpeqw, "(%s == %s) ? (~0) : 0") BINARY_SW(cmpgtsw, "(%s > %s) ? (~0) : 0") UNARY_SW(copyw, "%s") +UNARY_SW(loadw, "%s") BINARY_SW(maxsw, "ORC_MAX(%s, %s)") BINARY_UW(maxuw, "ORC_MAX((orc_uint16)%s, (orc_uint16)%s)") BINARY_SW(minsw, "ORC_MIN(%s, %s)") @@ -49,6 +52,7 @@ BINARY_SW(shlw, "%s << %s") BINARY_SW(shrsw, "%s >> %s") BINARY_UW(shruw, "((orc_uint16)%s) >> %s") UNARY_SW(signw, "ORC_CLAMP(%s,-1,1)") +UNARY_SW(storew, "%s") BINARY_SW(subw, "%s - %s") BINARY_SW(subssw, "ORC_CLAMP_SW(%s - %s)") BINARY_UW(subusw, "ORC_CLAMP_UW((orc_uint16)%s - (orc_uint16)%s)") @@ -65,6 +69,7 @@ BINARY_UL(avgul, "((orc_uint64)(orc_uint32)%s + (orc_uint64)(orc_uint32)%s + 1)> BINARY_SL(cmpeql, "(%s == %s) ? (~0) : 0") BINARY_SL(cmpgtsl, "(%s > %s) ? (~0) : 0") UNARY_SL(copyl, "%s") +UNARY_SL(loadl, "%s") BINARY_SL(maxsl, "ORC_MAX(%s, %s)") BINARY_UL(maxul, "ORC_MAX((orc_uint32)%s, (orc_uint32)%s)") BINARY_SL(minsl, "ORC_MIN(%s, %s)") @@ -77,6 +82,7 @@ BINARY_SL(shll, "%s << %s") BINARY_SL(shrsl, "%s >> %s") BINARY_UL(shrul, "((orc_uint32)%s) >> %s") UNARY_SL(signl, "ORC_CLAMP(%s,-1,1)") +UNARY_SL(storel, "%s") BINARY_SL(subl, "%s - %s") BINARY_SL(subssl, "ORC_CLAMP_SL((orc_int64)%s - (orc_int64)%s)") BINARY_UL(subusl, "ORC_CLAMP_UL((orc_int64)(orc_uint32)%s - (orc_int64)(orc_uint32)%s)") diff --git a/orc/orccompiler.c b/orc/orccompiler.c index 16423b8..937e47a 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -30,9 +30,11 @@ void orc_compiler_assign_rules (OrcCompiler *compiler); void orc_compiler_global_reg_alloc (OrcCompiler *compiler); +void orc_compiler_rewrite_insns (OrcCompiler *compiler); void orc_compiler_rewrite_vars (OrcCompiler *compiler); void orc_compiler_rewrite_vars2 (OrcCompiler *compiler); int orc_compiler_dup_temporary (OrcCompiler *compiler, int var, int j); +int orc_compiler_new_temporary (OrcCompiler *compiler, int size); void orc_compiler_check_sizes (OrcCompiler *compiler); static char **_orc_compiler_flag_list; @@ -244,6 +246,9 @@ orc_program_compile_full (OrcProgram *program, OrcTarget *target, orc_compiler_check_sizes (compiler); if (compiler->error) goto error; + orc_compiler_rewrite_insns (compiler); + if (compiler->error) goto error; + orc_compiler_assign_rules (compiler); if (compiler->error) goto error; @@ -336,6 +341,84 @@ orc_compiler_check_sizes (OrcCompiler *compiler) } void +orc_compiler_rewrite_insns (OrcCompiler *compiler) +{ + int i; + int j; + OrcStaticOpcode *opcode; + OrcProgram *program = compiler->program; + + compiler->n_insns = 0; + for(j=0;jn_insns;j++){ + OrcInstruction insn; + OrcInstruction *xinsn; + + memcpy (&insn, program->insns + j, sizeof(OrcInstruction)); + opcode = insn.opcode; + + if (!(opcode->flags & ORC_STATIC_OPCODE_LOAD)) { + for(i=0;isrc_size[i] == 0) continue; + + var = compiler->vars + insn.src_args[i]; + if (var->vartype == ORC_VAR_TYPE_SRC || + var->vartype == ORC_VAR_TYPE_DEST) { + OrcInstruction *cinsn; + + cinsn = compiler->insns + compiler->n_insns; + compiler->n_insns++; + + if (var->size == 1) { + cinsn->opcode = orc_opcode_find_by_name ("loadb"); + } else if (var->size == 2) { + cinsn->opcode = orc_opcode_find_by_name ("loadw"); + } else { + cinsn->opcode = orc_opcode_find_by_name ("loadl"); + } + cinsn->dest_args[0] = orc_compiler_new_temporary (compiler, var->size); + cinsn->src_args[0] = insn.src_args[i]; + insn.src_args[i] = cinsn->dest_args[0]; + } + } + } + + xinsn = compiler->insns + compiler->n_insns; + memcpy (xinsn, &insn, sizeof(OrcInstruction)); + compiler->n_insns++; + + if (!(opcode->flags & ORC_STATIC_OPCODE_STORE)) { + for(i=0;idest_size[i] == 0) continue; + + var = compiler->vars + insn.dest_args[i]; + if (var->vartype == ORC_VAR_TYPE_DEST) { + OrcInstruction *cinsn; + + cinsn = compiler->insns + compiler->n_insns; + compiler->n_insns++; + + if (var->size == 1) { + cinsn->opcode = orc_opcode_find_by_name ("storeb"); + } else if (var->size == 2) { + cinsn->opcode = orc_opcode_find_by_name ("storew"); + } else { + cinsn->opcode = orc_opcode_find_by_name ("storel"); + } + cinsn->src_args[0] = orc_compiler_new_temporary (compiler, var->size); + cinsn->dest_args[0] = xinsn->dest_args[i]; + xinsn->dest_args[i] = cinsn->src_args[0]; + } + } + } + + } +} + +void orc_compiler_assign_rules (OrcCompiler *compiler) { int i; @@ -585,6 +668,20 @@ orc_compiler_dup_temporary (OrcCompiler *compiler, int var, int j) return i; } +int +orc_compiler_new_temporary (OrcCompiler *compiler, int size) +{ + int i = ORC_VAR_T1 + compiler->n_temp_vars + compiler->n_dup_vars; + + compiler->vars[i].vartype = ORC_VAR_TYPE_TEMP; + compiler->vars[i].size = size; + compiler->vars[i].name = malloc (10); + sprintf(compiler->vars[i].name, "tmp%d", i); + compiler->n_dup_vars++; + + return i; +} + void orc_compiler_dump_asm (OrcCompiler *compiler) { diff --git a/orc/orcemulateopcodes.c b/orc/orcemulateopcodes.c index 7f4d9da..e654cef 100644 --- a/orc/orcemulateopcodes.c +++ b/orc/orcemulateopcodes.c @@ -340,6 +340,29 @@ emulate_copyb (OrcOpcodeExecutor *ex, int n) } void +emulate_loadb (OrcOpcodeExecutor *ex, int n) +{ + int i; + orc_int8 var0; + orc_int8 * ptr0; + orc_int8 var4; + const orc_int8 * ptr4; + + ptr0 = (orc_int8 *)ex->dest_ptrs[0]; + ptr4 = (orc_int8 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + /* 0: loadb */ + var0 = var4; + *ptr0 = var0; + ptr0++; + } + +} + +void emulate_maxsb (OrcOpcodeExecutor *ex, int n) { int i; @@ -659,6 +682,29 @@ emulate_signb (OrcOpcodeExecutor *ex, int n) } void +emulate_storeb (OrcOpcodeExecutor *ex, int n) +{ + int i; + orc_int8 var0; + orc_int8 * ptr0; + orc_int8 var4; + const orc_int8 * ptr4; + + ptr0 = (orc_int8 *)ex->dest_ptrs[0]; + ptr4 = (orc_int8 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + /* 0: storeb */ + var0 = var4; + *ptr0 = var0; + ptr0++; + } + +} + +void emulate_subb (OrcOpcodeExecutor *ex, int n) { int i; @@ -1069,6 +1115,29 @@ emulate_copyw (OrcOpcodeExecutor *ex, int n) } void +emulate_loadw (OrcOpcodeExecutor *ex, int n) +{ + int i; + orc_int16 var0; + orc_int16 * ptr0; + orc_int16 var4; + const orc_int16 * ptr4; + + ptr0 = (orc_int16 *)ex->dest_ptrs[0]; + ptr4 = (orc_int16 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + /* 0: loadw */ + var0 = var4; + *ptr0 = var0; + ptr0++; + } + +} + +void emulate_maxsw (OrcOpcodeExecutor *ex, int n) { int i; @@ -1388,6 +1457,29 @@ emulate_signw (OrcOpcodeExecutor *ex, int n) } void +emulate_storew (OrcOpcodeExecutor *ex, int n) +{ + int i; + orc_int16 var0; + orc_int16 * ptr0; + orc_int16 var4; + const orc_int16 * ptr4; + + ptr0 = (orc_int16 *)ex->dest_ptrs[0]; + ptr4 = (orc_int16 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + /* 0: storew */ + var0 = var4; + *ptr0 = var0; + ptr0++; + } + +} + +void emulate_subw (OrcOpcodeExecutor *ex, int n) { int i; @@ -1798,6 +1890,29 @@ emulate_copyl (OrcOpcodeExecutor *ex, int n) } void +emulate_loadl (OrcOpcodeExecutor *ex, int n) +{ + int i; + orc_union32 var0; + orc_union32 * ptr0; + orc_union32 var4; + const orc_union32 * ptr4; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union32 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + /* 0: loadl */ + var0.i = var4.i; + *ptr0 = var0; + ptr0++; + } + +} + +void emulate_maxsl (OrcOpcodeExecutor *ex, int n) { int i; @@ -2117,6 +2232,29 @@ emulate_signl (OrcOpcodeExecutor *ex, int n) } void +emulate_storel (OrcOpcodeExecutor *ex, int n) +{ + int i; + orc_union32 var0; + orc_union32 * ptr0; + orc_union32 var4; + const orc_union32 * ptr4; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union32 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + /* 0: storel */ + var0.i = var4.i; + *ptr0 = var0; + ptr0++; + } + +} + +void emulate_subl (OrcOpcodeExecutor *ex, int n) { int i; diff --git a/orc/orcemulateopcodes.h b/orc/orcemulateopcodes.h index 4d4a827..275431c 100644 --- a/orc/orcemulateopcodes.h +++ b/orc/orcemulateopcodes.h @@ -246,3 +246,9 @@ void emulate_convfl (OrcOpcodeExecutor *ex, int n); void emulate_n16_convfl (OrcOpcodeExecutor *ex); void emulate_convlf (OrcOpcodeExecutor *ex, int n); void emulate_n16_convlf (OrcOpcodeExecutor *ex); +void emulate_loadb (OrcOpcodeExecutor *ex, int n); +void emulate_loadw (OrcOpcodeExecutor *ex, int n); +void emulate_loadl (OrcOpcodeExecutor *ex, int n); +void emulate_storeb (OrcOpcodeExecutor *ex, int n); +void emulate_storew (OrcOpcodeExecutor *ex, int n); +void emulate_storel (OrcOpcodeExecutor *ex, int n); diff --git a/orc/orcfunctions.c b/orc/orcfunctions.c index 14c8ea1..825b2a2 100644 --- a/orc/orcfunctions.c +++ b/orc/orcfunctions.c @@ -7,7 +7,6 @@ #ifndef DISABLE_ORC #include #endif -#include #ifndef _ORC_INTEGER_TYPEDEFS_ #define _ORC_INTEGER_TYPEDEFS_ diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index ada3d02..8fd275e 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -396,6 +396,7 @@ BINARY_UB(avgub, (a + b + 1)>>1) BINARY_SB(cmpeqb, (a == b) ? (~0) : 0) BINARY_SB(cmpgtsb, (a > b) ? (~0) : 0) UNARY_SB(copyb, a) +UNARY_SB(loadb, a) BINARY_SB(maxsb, (a > b) ? a : b) BINARY_UB(maxub, (a > b) ? a : b) BINARY_SB(minsb, (a < b) ? a : b) @@ -408,6 +409,7 @@ BINARY_SB(shlb, a << b) BINARY_SB(shrsb, a >> b) BINARY_UB(shrub, (a) >> b) UNARY_SB(signb, ORC_CLAMP(a,-1,1)) +UNARY_SB(storeb, a) BINARY_SB(subb, a - b) BINARY_SB(subssb, ORC_CLAMP_SB(a - b)) BINARY_UB(subusb, ORC_CLAMP_UB(a - b)) @@ -424,6 +426,7 @@ BINARY_UW(avguw, (a + b + 1)>>1) BINARY_SW(cmpeqw, (a == b) ? (~0) : 0) BINARY_SW(cmpgtsw, (a > b) ? (~0) : 0) UNARY_SW(copyw, a) +UNARY_SW(loadw, a) BINARY_SW(maxsw, (a > b) ? a : b) BINARY_UW(maxuw, (a > b) ? a : b) BINARY_SW(minsw, (a < b) ? a : b) @@ -436,6 +439,7 @@ BINARY_SW(shlw, a << b) BINARY_SW(shrsw, a >> b) BINARY_UW(shruw, a >> b) UNARY_SW(signw, ORC_CLAMP(a,-1,1)) +UNARY_SW(storew, a) BINARY_SW(subw, a - b) BINARY_SW(subssw, ORC_CLAMP_SW(a - b)) BINARY_UW(subusw, ORC_CLAMP_UW(a - b)) @@ -452,6 +456,7 @@ BINARY_UL(avgul, ((orc_uint64)(orc_uint32)a + (orc_uint64)(orc_uint32)b + 1)>>1) BINARY_SL(cmpeql, (a == b) ? (~0) : 0) BINARY_SL(cmpgtsl, (a > b) ? (~0) : 0) UNARY_SL(copyl, a) +UNARY_SL(loadl, a) BINARY_SL(maxsl, (a > b) ? a : b) BINARY_UL(maxul, ((orc_uint32)a > (orc_uint32)b) ? a : b) BINARY_SL(minsl, (a < b) ? a : b) @@ -464,6 +469,7 @@ BINARY_SL(shll, a << b) BINARY_SL(shrsl, a >> b) BINARY_UL(shrul, ((orc_uint32)a) >> b) UNARY_SL(signl, ORC_CLAMP(a,-1,1)) +UNARY_SL(storel, a) BINARY_SL(subl, a - b) BINARY_SL(subssl, ORC_CLAMP_SL((orc_int64)a - (orc_int64)b)) BINARY_UL(subusl, (((orc_uint32)a) < ((orc_uint32)b)) ? 0 : a - b) @@ -712,6 +718,7 @@ static OrcStaticOpcode opcodes[] = { { "cmpeqb", cmpeqb, NULL, 0, { 1 }, { 1, 1 }, emulate_cmpeqb }, { "cmpgtsb", cmpgtsb, NULL, 0, { 1 }, { 1, 1 }, emulate_cmpgtsb }, { "copyb", copyb, NULL, 0, { 1 }, { 1 }, emulate_copyb }, + { "loadb", loadb, NULL, ORC_STATIC_OPCODE_LOAD, { 1 }, { 1 }, emulate_loadb }, { "maxsb", maxsb, NULL, 0, { 1 }, { 1, 1 }, emulate_maxsb }, { "maxub", maxub, NULL, 0, { 1 }, { 1, 1 }, emulate_maxub }, { "minsb", minsb, NULL, 0, { 1 }, { 1, 1 }, emulate_minsb }, @@ -724,6 +731,7 @@ static OrcStaticOpcode opcodes[] = { { "shrsb", shrsb, NULL, ORC_STATIC_OPCODE_SCALAR, { 1 }, { 1, 1 }, emulate_shrsb }, { "shrub", shrub, NULL, ORC_STATIC_OPCODE_SCALAR, { 1 }, { 1, 1 }, emulate_shrub }, { "signb", signb, NULL, 0, { 1 }, { 1 }, emulate_signb }, + { "storeb", storeb, NULL, ORC_STATIC_OPCODE_STORE, { 1 }, { 1 }, emulate_storeb }, { "subb", subb, NULL, 0, { 1 }, { 1, 1 }, emulate_subb }, { "subssb", subssb, NULL, 0, { 1 }, { 1, 1 }, emulate_subssb }, { "subusb", subusb, NULL, 0, { 1 }, { 1, 1 }, emulate_subusb }, @@ -741,6 +749,7 @@ static OrcStaticOpcode opcodes[] = { { "cmpeqw", cmpeqw, NULL, 0, { 2 }, { 2, 2 }, emulate_cmpeqw }, { "cmpgtsw", cmpgtsw, NULL, 0, { 2 }, { 2, 2 }, emulate_cmpgtsw }, { "copyw", copyw, NULL, 0, { 2 }, { 2 }, emulate_copyw }, + { "loadw", loadw, NULL, ORC_STATIC_OPCODE_LOAD, { 2 }, { 2 }, emulate_loadw }, { "maxsw", maxsw, NULL, 0, { 2 }, { 2, 2 }, emulate_maxsw }, { "maxuw", maxuw, NULL, 0, { 2 }, { 2, 2 }, emulate_maxuw }, { "minsw", minsw, NULL, 0, { 2 }, { 2, 2 }, emulate_minsw }, @@ -753,6 +762,7 @@ static OrcStaticOpcode opcodes[] = { { "shrsw", shrsw, NULL, ORC_STATIC_OPCODE_SCALAR, { 2 }, { 2, 2 }, emulate_shrsw }, { "shruw", shruw, NULL, ORC_STATIC_OPCODE_SCALAR, { 2 }, { 2, 2 }, emulate_shruw }, { "signw", signw, NULL, 0, { 2 }, { 2 }, emulate_signw }, + { "storew", storew, NULL, ORC_STATIC_OPCODE_STORE, { 2 }, { 2 }, emulate_storew }, { "subw", subw, NULL, 0, { 2 }, { 2, 2 }, emulate_subw }, { "subssw", subssw, NULL, 0, { 2 }, { 2, 2 }, emulate_subssw }, { "subusw", subusw, NULL, 0, { 2 }, { 2, 2 }, emulate_subusw }, @@ -770,6 +780,7 @@ static OrcStaticOpcode opcodes[] = { { "cmpeql", cmpeql, NULL, 0, { 4 }, { 4, 4 }, emulate_cmpeql }, { "cmpgtsl", cmpgtsl, NULL, 0, { 4 }, { 4, 4 }, emulate_cmpgtsl }, { "copyl", copyl, NULL, 0, { 4 }, { 4 }, emulate_copyl }, + { "loadl", loadl, NULL, ORC_STATIC_OPCODE_LOAD, { 4 }, { 4 }, emulate_loadl }, { "maxsl", maxsl, NULL, 0, { 4 }, { 4, 4 }, emulate_maxsl }, { "maxul", maxul, NULL, 0, { 4 }, { 4, 4 }, emulate_maxul }, { "minsl", minsl, NULL, 0, { 4 }, { 4, 4 }, emulate_minsl }, @@ -782,6 +793,7 @@ static OrcStaticOpcode opcodes[] = { { "shrsl", shrsl, NULL, ORC_STATIC_OPCODE_SCALAR, { 4 }, { 4, 4 }, emulate_shrsl }, { "shrul", shrul, NULL, ORC_STATIC_OPCODE_SCALAR, { 4 }, { 4, 4 }, emulate_shrul }, { "signl", signl, NULL, 0, { 4 }, { 4 }, emulate_signl }, + { "storel", storel, NULL, ORC_STATIC_OPCODE_STORE, { 4 }, { 4 }, emulate_storel }, { "subl", subl, NULL, 0, { 4 }, { 4, 4 }, emulate_subl }, { "subssl", subssl, NULL, 0, { 4 }, { 4, 4 }, emulate_subssl }, { "subusl", subusl, NULL, 0, { 4 }, { 4, 4 }, emulate_subusl }, diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 6e3e03b..0beba46 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -657,23 +657,24 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) int n_left = compiler->program->constant_n; int save_loop_shift; int loop_shift; - int offset = 0; + + compiler->offset = 0; save_loop_shift = compiler->loop_shift; while (n_left >= (1<loop_shift)) { ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); - orc_sse_emit_loop (compiler, offset, 0); + orc_sse_emit_loop (compiler, compiler->offset, 0); n_left -= 1<loop_shift; - offset += 1<loop_shift; + compiler->offset += 1<loop_shift; } for(loop_shift = compiler->loop_shift-1; loop_shift>=0; loop_shift--) { if (n_left >= (1<loop_shift = loop_shift; ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", loop_shift); - orc_sse_emit_loop (compiler, offset, 0); + orc_sse_emit_loop (compiler, compiler->offset, 0); n_left -= 1<offset += 1<loop_shift = save_loop_shift; @@ -719,9 +720,11 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) orc_x86_emit_label (compiler, LABEL_INNER_LOOP_START); ui_max = 1<unroll_shift; for(ui=0;uiloop_shift, + compiler->offset = ui<loop_shift; + orc_sse_emit_loop (compiler, compiler->offset, (ui==ui_max-1) << (compiler->loop_shift + compiler->unroll_shift)); } + compiler->offset = 0; if (compiler->loop_counter != ORC_REG_INVALID) { orc_x86_emit_add_imm_reg (compiler, 4, -1, compiler->loop_counter, TRUE); } else { @@ -801,27 +804,6 @@ orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) ORC_ASM_CODE(compiler,"\n"); #endif - for(k=0;kvars + insn->src_args[k]; - - if (opcode->src_size[k] == 0) continue; - - switch (var->vartype) { - case ORC_VAR_TYPE_SRC: - case ORC_VAR_TYPE_DEST: - orc_sse_emit_load_src (compiler, var, offset*var->size); - break; - case ORC_VAR_TYPE_CONST: - break; - case ORC_VAR_TYPE_PARAM: - break; - case ORC_VAR_TYPE_TEMP: - break; - default: - break; - } - } - rule = insn->rule; if (rule && rule->emit) { if (!(insn->opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) && @@ -835,22 +817,6 @@ orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) } else { ORC_COMPILER_ERROR(compiler,"No rule for: %s", opcode->name); } - - for(k=0;kvars + insn->dest_args[k]; - - if (opcode->dest_size[k] == 0) continue; - - switch (var->vartype) { - case ORC_VAR_TYPE_DEST: - orc_sse_emit_store_dest (compiler, var, offset*var->size); - break; - case ORC_VAR_TYPE_TEMP: - break; - default: - break; - } - } } if (update) { diff --git a/orc/orcprogram.h b/orc/orcprogram.h index a4b1a66..b5c06fb 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -263,6 +263,8 @@ struct _OrcOpcodeSet { #define ORC_STATIC_OPCODE_FLOAT_DEST (1<<2) #define ORC_STATIC_OPCODE_FLOAT (ORC_STATIC_OPCODE_FLOAT_SRC|ORC_STATIC_OPCODE_FLOAT_DEST) #define ORC_STATIC_OPCODE_SCALAR (1<<3) +#define ORC_STATIC_OPCODE_LOAD (1<<4) +#define ORC_STATIC_OPCODE_STORE (1<<5) struct _OrcStaticOpcode { @@ -402,6 +404,8 @@ struct _OrcCompiler { int alloc_loop_counter; int loop_counter; int size_region; + + int offset; }; #define ORC_SRC_ARG(p,i,n) ((p)->vars[(i)->src_args[(n)]].alloc) diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 93fd25e..b3751df 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -88,6 +88,105 @@ orc_sse_emit_loadpq (OrcCompiler *p, int reg, int param) } static void +sse_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn) +{ + OrcVariable *src = compiler->vars + insn->src_args[0]; + OrcVariable *dest = compiler->vars + insn->dest_args[0]; + int ptr_reg; + int offset = 0; + + offset = compiler->offset * src->size; + if (src->ptr_register == 0) { + int i = insn->src_args[0]; + orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]), + compiler->exec_reg, compiler->gp_tmpreg); + ptr_reg = compiler->gp_tmpreg; + } else { + ptr_reg = src->ptr_register; + } + switch (src->size << compiler->loop_shift) { + case 1: + orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg, + compiler->gp_tmpreg); + orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc); + break; + case 2: + orc_x86_emit_mov_memoffset_reg (compiler, 2, offset, ptr_reg, + compiler->gp_tmpreg); + orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, dest->alloc); + break; + case 4: + orc_x86_emit_mov_memoffset_sse (compiler, 4, offset, ptr_reg, + dest->alloc, src->is_aligned); + break; + case 8: + orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, ptr_reg, + dest->alloc, src->is_aligned); + break; + case 16: + orc_x86_emit_mov_memoffset_sse (compiler, 16, offset, ptr_reg, + dest->alloc, src->is_aligned); + break; + default: + ORC_COMPILER_ERROR(compiler,"bad load size %d", + src->size << compiler->loop_shift); + break; + } +} + +static void +sse_rule_storeX (OrcCompiler *compiler, void *user, OrcInstruction *insn) +{ + OrcVariable *src = compiler->vars + insn->src_args[0]; + OrcVariable *dest = compiler->vars + insn->dest_args[0]; + int offset; + int ptr_reg; + + offset = compiler->offset * dest->size; + if (dest->ptr_register == 0) { + orc_x86_emit_mov_memoffset_reg (compiler, compiler->is_64bit ? 8 : 4, + dest->ptr_offset, compiler->exec_reg, compiler->gp_tmpreg); + ptr_reg = compiler->gp_tmpreg; + } else { + ptr_reg = dest->ptr_register; + } + switch (dest->size << compiler->loop_shift) { + case 1: + /* FIXME we might be using ecx twice here */ + if (ptr_reg == compiler->gp_tmpreg) { + ORC_COMPILER_ERROR(compiler,"unimplemented"); + } + orc_x86_emit_mov_sse_reg (compiler, src->alloc, compiler->gp_tmpreg); + orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg, offset, ptr_reg); + break; + case 2: + /* FIXME we might be using ecx twice here */ + if (ptr_reg == compiler->gp_tmpreg) { + ORC_COMPILER_ERROR(compiler,"unimplemented"); + } + orc_x86_emit_mov_sse_reg (compiler, src->alloc, compiler->gp_tmpreg); + orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg, offset, ptr_reg); + break; + case 4: + orc_x86_emit_mov_sse_memoffset (compiler, 4, src->alloc, offset, ptr_reg, + dest->is_aligned, dest->is_uncached); + break; + case 8: + orc_x86_emit_mov_sse_memoffset (compiler, 8, src->alloc, offset, ptr_reg, + dest->is_aligned, dest->is_uncached); + break; + case 16: + orc_x86_emit_mov_sse_memoffset (compiler, 16, src->alloc, offset, ptr_reg, + dest->is_aligned, dest->is_uncached); + break; + default: + ORC_COMPILER_ERROR(compiler,"bad size"); + break; + } +} + +static void sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->vars[insn->src_args[0]].alloc == p->vars[insn->dest_args[0]].alloc) { @@ -1058,6 +1157,14 @@ orc_compiler_sse_register_rules (OrcTarget *target) rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, ORC_TARGET_SSE_SSE2); + orc_rule_register (rule_set, "loadb", sse_rule_loadX, NULL); + orc_rule_register (rule_set, "loadw", sse_rule_loadX, NULL); + orc_rule_register (rule_set, "loadl", sse_rule_loadX, NULL); + + orc_rule_register (rule_set, "storeb", sse_rule_storeX, NULL); + orc_rule_register (rule_set, "storew", sse_rule_storeX, NULL); + orc_rule_register (rule_set, "storel", sse_rule_storeX, NULL); + REG(addb); REG(addssb); REG(addusb); -- 2.7.4