From d12f1deab45fbe66f9550d92f1e02c757ed9d393 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Fri, 6 Aug 2010 17:50:39 -0700 Subject: [PATCH] Add some 64-bit opcodes, and SSE rules --- orc/opcodes.h | 15 +++- orc/orccompiler.c | 92 ++++++++++++++------ orc/orcemulateopcodes.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++ orc/orcemulateopcodes.h | 10 +++ orc/orcexecutor.c | 37 +++++--- orc/orcopcodes.c | 20 +++-- orc/orcparse.c | 38 ++++++--- orc/orcprogram-c.c | 20 +++++ orc/orcprogram.c | 50 +++++++++++ orc/orcprogram.h | 20 ++++- orc/orcrules-sse.c | 48 +++++++++++ 11 files changed, 511 insertions(+), 59 deletions(-) diff --git a/orc/opcodes.h b/orc/opcodes.h index 694b891..4db81a8 100644 --- a/orc/opcodes.h +++ b/orc/opcodes.h @@ -85,21 +85,32 @@ BINARY_SL(subssl, "ORC_CLAMP_SL((orc_int64)%s - (orc_int64)%s)") BINARY_UL(subusl, "ORC_CLAMP_UL((orc_int64)(orc_uint32)%s - (orc_int64)(orc_uint32)%s)") BINARY_SL(xorl, "%s ^ %s") +UNARY_SQ(loadpq, "%s") + UNARY_BW(convsbw, "%s") UNARY_BW(convubw, "(orc_uint8)%s") -UNARY_WL(convswl, "%s") -UNARY_WL(convuwl, "(orc_uint16)%s") UNARY_WB(convwb, "%s") UNARY_WB(convssswb, "ORC_CLAMP_SB(%s)") UNARY_WB(convsuswb, "ORC_CLAMP_UB(%s)") UNARY_WB(convusswb, "ORC_CLAMP_SB((orc_uint16)%s)") UNARY_WB(convuuswb, "ORC_CLAMP_UB((orc_uint16)%s)") + +UNARY_WL(convswl, "%s") +UNARY_WL(convuwl, "(orc_uint16)%s") UNARY_LW(convlw, "%s") UNARY_LW(convssslw, "ORC_CLAMP_SW(%s)") UNARY_LW(convsuslw, "ORC_CLAMP_UW(%s)") UNARY_LW(convusslw, "ORC_CLAMP_SW((orc_uint32)%s)") UNARY_LW(convuuslw, "ORC_CLAMP_UW((orc_uint32)%s)") +UNARY_LQ(convslq, "%s") +UNARY_LQ(convulq, "(orc_uint32)%s") +UNARY_LW(convql, "%s") +UNARY_LW(convsssql, "ORC_CLAMP_SL(%s)") +UNARY_LW(convsusql, "ORC_CLAMP_UL(%s)") +UNARY_LW(convussql, "ORC_CLAMP_SL((orc_uint64)%s)") +UNARY_LW(convuusql, "ORC_CLAMP_UL((orc_uint64)%s)") + BINARY_BW(mulsbw, "%s * %s") BINARY_BW(mulubw, "(orc_uint8)%s * (orc_uint8)%s") BINARY_WL(mulswl, "%s * %s") diff --git a/orc/orccompiler.c b/orc/orccompiler.c index 7115785..a4e41c9 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -355,25 +355,34 @@ orc_compiler_check_sizes (OrcCompiler *compiler) for(i=0;in_insns;i++) { OrcInstruction *insn = compiler->insns + i; OrcStaticOpcode *opcode = insn->opcode; + int multiplier = 1; + + if (insn->flags & ORC_INSTRUCTION_FLAG_X2) { + multiplier = 2; + } else if (insn->flags & ORC_INSTRUCTION_FLAG_X4) { + multiplier = 4; + } for(j=0;jdest_size[j] == 0) continue; - if (opcode->dest_size[j] != compiler->vars[insn->dest_args[j]].size) { + if (multiplier * opcode->dest_size[j] != + compiler->vars[insn->dest_args[j]].size) { ORC_COMPILER_ERROR(compiler, "size mismatch, opcode %s dest[%d] is %d should be %d", opcode->name, j, compiler->vars[insn->dest_args[j]].size, - opcode->dest_size[j]); + multiplier * opcode->dest_size[j]); compiler->result = ORC_COMPILE_RESULT_UNKNOWN_PARSE; return; } } for(j=0;jsrc_size[j] == 0) continue; - if (opcode->src_size[j] != compiler->vars[insn->src_args[j]].size && + if (multiplier * opcode->src_size[j] != + compiler->vars[insn->src_args[j]].size && compiler->vars[insn->src_args[j]].vartype != ORC_VAR_TYPE_PARAM && compiler->vars[insn->src_args[j]].vartype != ORC_VAR_TYPE_CONST) { ORC_COMPILER_ERROR(compiler, "size mismatch, opcode %s src[%d] is %d should be %d", opcode->name, j, compiler->vars[insn->src_args[j]].size, - opcode->src_size[j]); + multiplier * opcode->src_size[j]); compiler->result = ORC_COMPILE_RESULT_UNKNOWN_PARSE; return; } @@ -390,6 +399,57 @@ orc_compiler_check_sizes (OrcCompiler *compiler) } } +static OrcStaticOpcode * +get_load_opcode_for_size (int size) +{ + switch (size) { + case 1: + return orc_opcode_find_by_name ("loadb"); + case 2: + return orc_opcode_find_by_name ("loadw"); + case 4: + return orc_opcode_find_by_name ("loadl"); + case 8: + return orc_opcode_find_by_name ("loadq"); + default: + ORC_ASSERT(0); + } +} + +static OrcStaticOpcode * +get_loadp_opcode_for_size (int size) +{ + switch (size) { + case 1: + return orc_opcode_find_by_name ("loadpb"); + case 2: + return orc_opcode_find_by_name ("loadpw"); + case 4: + return orc_opcode_find_by_name ("loadpl"); + case 8: + return orc_opcode_find_by_name ("loadpq"); + default: + ORC_ASSERT(0); + } +} + +static OrcStaticOpcode * +get_store_opcode_for_size (int size) +{ + switch (size) { + case 1: + return orc_opcode_find_by_name ("storeb"); + case 2: + return orc_opcode_find_by_name ("storew"); + case 4: + return orc_opcode_find_by_name ("storel"); + case 8: + return orc_opcode_find_by_name ("storeq"); + default: + ORC_ASSERT(0); + } +} + void orc_compiler_rewrite_insns (OrcCompiler *compiler) { @@ -422,13 +482,7 @@ orc_compiler_rewrite_insns (OrcCompiler *compiler) compiler->insn_flags[compiler->n_insns] |= ORC_INSN_FLAG_ADDED; compiler->n_insns++; - if (var->size == 1) { - cinsn->opcode = orc_opcode_find_by_name ("loadb"); - } else if (var->size == 2) { - cinsn->opcode = orc_opcode_find_by_name ("loadw"); - } else { - cinsn->opcode = orc_opcode_find_by_name ("loadl"); - } + cinsn->opcode = get_load_opcode_for_size (var->size); cinsn->dest_args[0] = orc_compiler_new_temporary (compiler, var->size); cinsn->src_args[0] = insn.src_args[i]; insn.src_args[i] = cinsn->dest_args[0]; @@ -440,13 +494,7 @@ orc_compiler_rewrite_insns (OrcCompiler *compiler) compiler->insn_flags[compiler->n_insns] |= ORC_INSN_FLAG_ADDED; compiler->n_insns++; - if (var->size == 1) { - cinsn->opcode = orc_opcode_find_by_name ("loadpb"); - } else if (var->size == 2) { - cinsn->opcode = orc_opcode_find_by_name ("loadpw"); - } else { - cinsn->opcode = orc_opcode_find_by_name ("loadpl"); - } + cinsn->opcode = get_loadp_opcode_for_size (var->size); cinsn->dest_args[0] = orc_compiler_new_temporary (compiler, var->size); cinsn->src_args[0] = insn.src_args[i]; insn.src_args[i] = cinsn->dest_args[0]; @@ -472,13 +520,7 @@ orc_compiler_rewrite_insns (OrcCompiler *compiler) compiler->insn_flags[compiler->n_insns] |= ORC_INSN_FLAG_ADDED; compiler->n_insns++; - if (var->size == 1) { - cinsn->opcode = orc_opcode_find_by_name ("storeb"); - } else if (var->size == 2) { - cinsn->opcode = orc_opcode_find_by_name ("storew"); - } else { - cinsn->opcode = orc_opcode_find_by_name ("storel"); - } + cinsn->opcode = get_store_opcode_for_size (var->size); cinsn->src_args[0] = orc_compiler_new_temporary (compiler, var->size); cinsn->dest_args[0] = xinsn->dest_args[i]; xinsn->dest_args[i] = cinsn->src_args[0]; diff --git a/orc/orcemulateopcodes.c b/orc/orcemulateopcodes.c index f4ec60f..5865d0d 100644 --- a/orc/orcemulateopcodes.c +++ b/orc/orcemulateopcodes.c @@ -2509,6 +2509,65 @@ emulate_xorl (OrcOpcodeExecutor *ex, int offset, int n) } void +emulate_loadq (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union64 * ptr0; + const orc_union64 * ptr4; + orc_union64 var32; + + ptr0 = (orc_union64 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[offset + i]; + /* 1: storeq */ + ptr0[i] = var32; + } + +} + +void +emulate_loadpq (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union64 * ptr0; + const int var24 = ((orc_union32 *)(ex->src_ptrs[0]))->i; + orc_union64 var32; + + ptr0 = (orc_union64 *)ex->dest_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadpq */ + var32.i = var24; + /* 1: storeq */ + ptr0[i] = var32; + } + +} + +void +emulate_storeq (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union64 * ptr0; + const orc_union64 * ptr4; + orc_union64 var32; + + ptr0 = (orc_union64 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: storeq */ + ptr0[offset + i] = var32; + } + +} + +void emulate_convsbw (OrcOpcodeExecutor *ex, int offset, int n) { int i; @@ -2601,6 +2660,52 @@ emulate_convuwl (OrcOpcodeExecutor *ex, int offset, int n) } void +emulate_convslq (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union64 * ptr0; + const orc_union32 * ptr4; + orc_union32 var32; + orc_union64 var33; + + ptr0 = (orc_union64 *)ex->dest_ptrs[0]; + ptr4 = (orc_union32 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var32 = ptr4[i]; + /* 1: convslq */ + var33.i = var32.i; + /* 2: storeq */ + ptr0[i] = var33; + } + +} + +void +emulate_convulq (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union64 * ptr0; + const orc_union32 * ptr4; + orc_union32 var32; + orc_union64 var33; + + ptr0 = (orc_union64 *)ex->dest_ptrs[0]; + ptr4 = (orc_union32 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var32 = ptr4[i]; + /* 1: convulq */ + var33.i = (orc_uint32)var32.i; + /* 2: storeq */ + ptr0[i] = var33; + } + +} + +void emulate_convwb (OrcOpcodeExecutor *ex, int offset, int n) { int i; @@ -2831,6 +2936,121 @@ emulate_convuuslw (OrcOpcodeExecutor *ex, int offset, int n) } void +emulate_convql (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union32 * ptr0; + const orc_union64 * ptr4; + orc_union64 var32; + orc_union32 var33; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: convql */ + var33.i = var32.i; + /* 2: storel */ + ptr0[i] = var33; + } + +} + +void +emulate_convsssql (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union32 * ptr0; + const orc_union64 * ptr4; + orc_union64 var32; + orc_union32 var33; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: convsssql */ + var33.i = ORC_CLAMP_SL(var32.i); + /* 2: storel */ + ptr0[i] = var33; + } + +} + +void +emulate_convsusql (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union32 * ptr0; + const orc_union64 * ptr4; + orc_union64 var32; + orc_union32 var33; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: convsusql */ + var33.i = ORC_CLAMP_UL(var32.i); + /* 2: storel */ + ptr0[i] = var33; + } + +} + +void +emulate_convussql (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union32 * ptr0; + const orc_union64 * ptr4; + orc_union64 var32; + orc_union32 var33; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: convussql */ + var33.i = ORC_CLAMP_SL((orc_uint64)var32.i); + /* 2: storel */ + ptr0[i] = var33; + } + +} + +void +emulate_convuusql (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union32 * ptr0; + const orc_union64 * ptr4; + orc_union64 var32; + orc_union32 var33; + + ptr0 = (orc_union32 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: convuusql */ + var33.i = ORC_CLAMP_UL((orc_uint64)var32.i); + /* 2: storel */ + ptr0[i] = var33; + } + +} + +void emulate_mulsbw (OrcOpcodeExecutor *ex, int offset, int n) { int i; diff --git a/orc/orcemulateopcodes.h b/orc/orcemulateopcodes.h index 726e652..49d2e2d 100644 --- a/orc/orcemulateopcodes.h +++ b/orc/orcemulateopcodes.h @@ -99,10 +99,15 @@ void emulate_subl (OrcOpcodeExecutor *ex, int i, int n); void emulate_subssl (OrcOpcodeExecutor *ex, int i, int n); void emulate_subusl (OrcOpcodeExecutor *ex, int i, int n); void emulate_xorl (OrcOpcodeExecutor *ex, int i, int n); +void emulate_loadq (OrcOpcodeExecutor *ex, int i, int n); +void emulate_loadpq (OrcOpcodeExecutor *ex, int i, int n); +void emulate_storeq (OrcOpcodeExecutor *ex, int i, int n); void emulate_convsbw (OrcOpcodeExecutor *ex, int i, int n); void emulate_convubw (OrcOpcodeExecutor *ex, int i, int n); void emulate_convswl (OrcOpcodeExecutor *ex, int i, int n); void emulate_convuwl (OrcOpcodeExecutor *ex, int i, int n); +void emulate_convslq (OrcOpcodeExecutor *ex, int i, int n); +void emulate_convulq (OrcOpcodeExecutor *ex, int i, int n); void emulate_convwb (OrcOpcodeExecutor *ex, int i, int n); void emulate_convssswb (OrcOpcodeExecutor *ex, int i, int n); void emulate_convsuswb (OrcOpcodeExecutor *ex, int i, int n); @@ -113,6 +118,11 @@ void emulate_convssslw (OrcOpcodeExecutor *ex, int i, int n); void emulate_convsuslw (OrcOpcodeExecutor *ex, int i, int n); void emulate_convusslw (OrcOpcodeExecutor *ex, int i, int n); void emulate_convuuslw (OrcOpcodeExecutor *ex, int i, int n); +void emulate_convql (OrcOpcodeExecutor *ex, int i, int n); +void emulate_convsssql (OrcOpcodeExecutor *ex, int i, int n); +void emulate_convsusql (OrcOpcodeExecutor *ex, int i, int n); +void emulate_convussql (OrcOpcodeExecutor *ex, int i, int n); +void emulate_convuusql (OrcOpcodeExecutor *ex, int i, int n); void emulate_mulsbw (OrcOpcodeExecutor *ex, int i, int n); void emulate_mulubw (OrcOpcodeExecutor *ex, int i, int n); void emulate_mulswl (OrcOpcodeExecutor *ex, int i, int n); diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c index 1719a34..debe81b 100644 --- a/orc/orcexecutor.c +++ b/orc/orcexecutor.c @@ -14,6 +14,7 @@ * @short_description: Running Orc programs */ +#define CHUNK_SIZE 16 OrcExecutor * orc_executor_new (OrcProgram *program) @@ -127,7 +128,7 @@ load_constant (void *data, int size, int value) { int l; orc_int8 *d = data; - for(l=0;l<16;l++) { + for(l=0;lvars + i; if (var->size) { - tmpspace[i] = malloc(4 * 16); + tmpspace[i] = malloc(ORC_MAX_VAR_SIZE * CHUNK_SIZE); } } @@ -199,6 +210,13 @@ orc_executor_emulate (OrcExecutor *ex) opcode = insn->opcode; opcode_ex[j].emulateN = opcode->emulateN; + opcode_ex[j].shift = 0; + if (insn->flags & ORC_INSTRUCTION_FLAG_X2) { + opcode_ex[j].shift = 1; + } else if (insn->flags & ORC_INSTRUCTION_FLAG_X4) { + opcode_ex[j].shift = 2; + } + for(k=0;kvars + insn->src_args[k]; if (opcode->src_size[k] == 0) continue; @@ -272,15 +290,12 @@ orc_executor_emulate (OrcExecutor *ex) } } - for(i=0;in;i+=16){ + for(i=0;in;i+=CHUNK_SIZE){ for(j=0;jn_insns;j++){ - insn = code->insns + j; - opcode = insn->opcode; - - if (ex->n - i >= 16) { - opcode_ex[j].emulateN (opcode_ex + j, i, 16); + if (ex->n - i >= CHUNK_SIZE) { + opcode_ex[j].emulateN (opcode_ex + j, i, CHUNK_SIZE << opcode_ex[j].shift); } else { - opcode_ex[j].emulateN (opcode_ex + j, i, ex->n - i); + opcode_ex[j].emulateN (opcode_ex + j, i, (ex->n - i) << opcode_ex[j].shift); } } } diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index 299bf19..b31df2c 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -274,6 +274,12 @@ orc_opcode_find_by_name (const char *name) return NULL; } +void +emulate_null (OrcOpcodeExecutor *ex, int offset, int n) +{ + /* This is a placeholder for adding new opcodes */ +} + #include "orc/orcemulateopcodes.h" static OrcStaticOpcode opcodes[] = { @@ -379,14 +385,16 @@ static OrcStaticOpcode opcodes[] = { { "subusl", 0, { 4 }, { 4, 4 }, emulate_subusl }, { "xorl", 0, { 4 }, { 4, 4 }, emulate_xorl }, + { "loadq", ORC_STATIC_OPCODE_LOAD, { 8 }, { 8 }, emulate_loadq }, + { "loadpq", ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_SCALAR|ORC_STATIC_OPCODE_INVARIANT, { 8 }, { 8 }, emulate_loadpq }, + { "storeq", ORC_STATIC_OPCODE_STORE, { 8 }, { 8 }, emulate_storeq }, + { "convsbw", 0, { 2 }, { 1 }, emulate_convsbw }, { "convubw", 0, { 2 }, { 1 }, emulate_convubw }, { "convswl", 0, { 4 }, { 2 }, emulate_convswl }, { "convuwl", 0, { 4 }, { 2 }, emulate_convuwl }, -#ifdef ENABLE_64BIT { "convslq", 0, { 8 }, { 4 }, emulate_convslq }, { "convulq", 0, { 8 }, { 4 }, emulate_convulq }, -#endif { "convwb", 0, { 1 }, { 2 }, emulate_convwb }, { "convssswb", 0, { 1 }, { 2 }, emulate_convssswb }, @@ -400,11 +408,11 @@ static OrcStaticOpcode opcodes[] = { { "convusslw", 0, { 2 }, { 4 }, emulate_convusslw }, { "convuuslw", 0, { 2 }, { 4 }, emulate_convuuslw }, -#ifdef ENABLE_64BIT { "convql", 0, { 4 }, { 8 }, emulate_convql }, - { "convssql", 0, { 4 }, { 8 }, emulate_convssql }, - { "convusql", 0, { 4 }, { 8 }, emulate_convusql }, -#endif + { "convsssql", 0, { 4 }, { 8 }, emulate_convsssql }, + { "convsusql", 0, { 4 }, { 8 }, emulate_convsusql }, + { "convussql", 0, { 4 }, { 8 }, emulate_convussql }, + { "convuusql", 0, { 4 }, { 8 }, emulate_convuusql }, { "mulsbw", 0, { 2 }, { 1, 1 }, emulate_mulsbw }, { "mulubw", 0, { 2 }, { 1, 1 }, emulate_mulubw }, diff --git a/orc/orcparse.c b/orc/orcparse.c index 688ec62..2f4d974 100644 --- a/orc/orcparse.c +++ b/orc/orcparse.c @@ -188,40 +188,50 @@ orc_parse_full (const char *code, OrcProgram ***programs, char **log) } } else { OrcStaticOpcode *o; + unsigned int flags = 0; + int offset = 0; + + if (strcmp (token[0], "x4") == 0) { + flags |= ORC_INSTRUCTION_FLAG_X4; + offset = 1; + } else if (strcmp (token[0], "x2") == 0) { + flags |= ORC_INSTRUCTION_FLAG_X2; + offset = 1; + } - o = get_opcode (parser, token[0]); + o = get_opcode (parser, token[offset]); if (o) { int n_args = opcode_n_args (o); - if (n_tokens != 1 + n_args) { + if (n_tokens != 1 + offset + n_args) { orc_parse_log (parser, "error: line %d: too %s arguments for %s (expected %d)\n", - parser->line_number, (n_tokens < 1+n_args) ? "few" : "many", - token[0], n_args); + parser->line_number, (n_tokens < 1+offset+n_args) ? "few" : "many", + token[offset], n_args); } - if (n_tokens == 4) { + if (n_tokens - offset == 4) { char *end; - int imm = strtol (token[3], &end, 0); - if (end != token[3]) { + int imm = strtol (token[offset + 3], &end, 0); + if (end != token[offset + 3]) { char creg[10]; sprintf(creg, "c%d", parser->creg_index); parser->creg_index++; orc_program_add_constant (parser->program, 2, imm, creg); - orc_program_append_str (parser->program, token[0], - token[1], token[2], creg); + orc_program_append_str_2 (parser->program, token[offset], flags, + token[offset+1], token[offset+2], creg, NULL); } else { - orc_program_append_str (parser->program, token[0], - token[1], token[2], token[3]); + orc_program_append_str_2 (parser->program, token[offset], flags, + token[offset+1], token[offset+2], token[offset+3], NULL); } } else { - orc_program_append_ds_str (parser->program, token[0], - token[1], token[2]); + orc_program_append_str_2 (parser->program, token[offset], flags, + token[offset+1], token[offset+2], NULL, NULL); } } else { orc_parse_log (parser, "error: line %d: unknown opcode: %s\n", parser->line_number, - token[0]); + token[offset]); } } } diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c index b561ff1..9fce6ac 100644 --- a/orc/orcprogram-c.c +++ b/orc/orcprogram-c.c @@ -613,18 +613,24 @@ c_rule_ ## name (OrcCompiler *p, void *user, OrcInstruction *insn) \ #define BINARY_UW(a,b) BINARY(a,b) #define BINARY_SL(a,b) BINARY(a,b) #define BINARY_UL(a,b) BINARY(a,b) +#define BINARY_SQ(a,b) BINARY(a,b) +#define BINARY_UQ(a,b) BINARY(a,b) #define UNARY_SB(a,b) UNARY(a,b) #define UNARY_UB(a,b) UNARY(a,b) #define UNARY_SW(a,b) UNARY(a,b) #define UNARY_UW(a,b) UNARY(a,b) #define UNARY_SL(a,b) UNARY(a,b) #define UNARY_UL(a,b) UNARY(a,b) +#define UNARY_SQ(a,b) UNARY(a,b) +#define UNARY_UQ(a,b) UNARY(a,b) #define BINARY_BW(a,b) BINARY(a,b) #define BINARY_WL(a,b) BINARY(a,b) #define BINARY_LW(a,b) BINARY(a,b) #define BINARY_WB(a,b) BINARY(a,b) #define UNARY_BW(a,b) UNARY(a,b) #define UNARY_WL(a,b) UNARY(a,b) +#define UNARY_LQ(a,b) UNARY(a,b) +#define UNARY_QL(a,b) UNARY(a,b) #define UNARY_LW(a,b) UNARY(a,b) #define UNARY_WB(a,b) UNARY(a,b) @@ -642,6 +648,8 @@ c_rule_ ## name (OrcCompiler *p, void *user, OrcInstruction *insn) \ #undef BINARY_UW #undef BINARY_SL #undef BINARY_UL +#undef BINARY_SQ +#undef BINARY_UQ #undef BINARY_F #undef UNARY_SB #undef UNARY_UB @@ -649,6 +657,8 @@ c_rule_ ## name (OrcCompiler *p, void *user, OrcInstruction *insn) \ #undef UNARY_UW #undef UNARY_SL #undef UNARY_UL +#undef UNARY_SQ +#undef UNARY_UQ #undef UNARY_F #undef BINARY_BW #undef BINARY_WL @@ -656,6 +666,8 @@ c_rule_ ## name (OrcCompiler *p, void *user, OrcInstruction *insn) \ #undef BINARY_WB #undef UNARY_BW #undef UNARY_WL +#undef UNARY_LQ +#undef UNARY_QL #undef UNARY_LW #undef UNARY_WB #undef UNARY_FL @@ -798,6 +810,8 @@ orc_c_init (void) #define BINARY_UW(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define BINARY_SL(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define BINARY_UL(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); +#define BINARY_SQ(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); +#define BINARY_UQ(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define BINARY_F(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_SB(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_UB(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); @@ -805,6 +819,8 @@ orc_c_init (void) #define UNARY_UW(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_SL(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_UL(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); +#define UNARY_SQ(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); +#define UNARY_UQ(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_F(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define BINARY_BW(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define BINARY_WL(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); @@ -812,6 +828,8 @@ orc_c_init (void) #define BINARY_WB(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_BW(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_WL(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); +#define UNARY_LQ(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); +#define UNARY_QL(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_LW(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define UNARY_WB(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); @@ -824,6 +842,7 @@ orc_c_init (void) orc_rule_register (rule_set, "loadb", c_rule_loadX, NULL); orc_rule_register (rule_set, "loadw", c_rule_loadX, NULL); orc_rule_register (rule_set, "loadl", c_rule_loadX, NULL); + orc_rule_register (rule_set, "loadq", c_rule_loadX, NULL); orc_rule_register (rule_set, "loadoffb", c_rule_loadoffX, NULL); orc_rule_register (rule_set, "loadoffw", c_rule_loadoffX, NULL); orc_rule_register (rule_set, "loadoffl", c_rule_loadoffX, NULL); @@ -832,6 +851,7 @@ orc_c_init (void) orc_rule_register (rule_set, "storeb", c_rule_storeX, NULL); orc_rule_register (rule_set, "storew", c_rule_storeX, NULL); orc_rule_register (rule_set, "storel", c_rule_storeX, NULL); + orc_rule_register (rule_set, "storeq", c_rule_storeX, NULL); orc_rule_register (rule_set, "accw", c_rule_accw, NULL); orc_rule_register (rule_set, "accl", c_rule_accl, NULL); diff --git a/orc/orcprogram.c b/orc/orcprogram.c index a037693..8f9dc28 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -508,6 +508,8 @@ orc_program_find_var_by_name (OrcProgram *program, const char *name) { int i; + if (name == NULL) return -1; + for(i=0;ivars[i].name && strcmp (program->vars[i].name, name) == 0) { return i; @@ -553,6 +555,54 @@ orc_program_append_str (OrcProgram *program, const char *name, } /** + * orc_program_append_str_2: + * @program: a pointer to an OrcProgram structure + * @name: name of instruction + * @flags: flags + * @arg0: name of first variable + * @arg1: name of second variable + * @arg2: name of third variable + * @arg3: name of fourth variable + * + * Appends an instruction to the program, with arguments @arg0, + * @arg1, @arg2, and @arg3. + */ +void +orc_program_append_str_2 (OrcProgram *program, const char *name, + unsigned int flags, const char *arg1, const char *arg2, const char *arg3, + const char *arg4) +{ + OrcInstruction *insn; + int args[4]; + int i; + + insn = program->insns + program->n_insns; + + insn->opcode = orc_opcode_find_by_name (name); + if (!insn->opcode) { + ORC_ERROR ("unknown opcode: %s", name); + } + args[0] = orc_program_find_var_by_name (program, arg1); + args[1] = orc_program_find_var_by_name (program, arg2); + args[2] = orc_program_find_var_by_name (program, arg3); + args[3] = orc_program_find_var_by_name (program, arg4); + insn->flags = flags; + i = 0; + insn->dest_args[0] = args[i++]; + if (insn->opcode->dest_size[1] != 0) { + insn->dest_args[1] = args[i++]; + } + if (insn->opcode->src_size[0] != 0) { + insn->src_args[0] = args[i++]; + } + if (insn->opcode->src_size[1] != 0) { + insn->src_args[1] = args[i++]; + } + + program->n_insns++; +} + +/** * orc_program_append_ds_str: * @program: a pointer to an OrcProgram structure * @name: name of instruction diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 61aabcb..b251998 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -47,6 +47,8 @@ typedef void (*OrcExecutorFunc)(OrcExecutor *ex); #define ORC_N_TARGETS 10 #define ORC_N_RULE_SETS 10 +#define ORC_MAX_VAR_SIZE 8 + #define ORC_STRUCT_OFFSET(struct_type, member) \ ((long) ((unsigned int *) &((struct_type*) 0)->member)) @@ -292,8 +294,12 @@ struct _OrcInstruction { int src_args[ORC_STATIC_OPCODE_N_SRC]; OrcRule *rule; + unsigned int flags; }; +#define ORC_INSTRUCTION_FLAG_X2 (1<<0) +#define ORC_INSTRUCTION_FLAG_X4 (1<<1) + /** * OrcConstant: * @@ -327,7 +333,13 @@ struct _OrcFixup { */ struct _OrcProgram { /*< private >*/ - OrcInstruction insns[ORC_N_INSNS]; + struct { + OrcStaticOpcode *opcode; + int dest_args[ORC_STATIC_OPCODE_N_DEST]; + int src_args[ORC_STATIC_OPCODE_N_SRC]; + + OrcRule *rule; + } _unused[ORC_N_INSNS]; /* needed for ABI compatibility */ int n_insns; OrcVariable vars[ORC_N_VARIABLES]; @@ -345,6 +357,8 @@ struct _OrcProgram { void *code_exec; int code_size; + OrcInstruction insns[ORC_N_INSNS]; + void *backup_func; int is_2d; int constant_n; @@ -440,6 +454,7 @@ struct _OrcOpcodeExecutor { void *src_ptrs[ORC_STATIC_OPCODE_N_SRC]; void *dest_ptrs[ORC_STATIC_OPCODE_N_DEST]; + int shift; }; /** @@ -555,6 +570,9 @@ void orc_program_set_constant_m (OrcProgram *program, int m); void orc_program_append (OrcProgram *p, const char *opcode, int arg0, int arg1, int arg2); void orc_program_append_str (OrcProgram *p, const char *opcode, const char * arg0, const char * arg1, const char * arg2); +void orc_program_append_str_2 (OrcProgram *program, const char *name, + unsigned int flags, const char *arg1, const char *arg2, const char *arg3, + const char *arg4); void orc_program_append_ds (OrcProgram *program, const char *opcode, int arg0, int arg1); void orc_program_append_ds_str (OrcProgram *p, const char *opcode, diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 55289a7..0f514b1 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -864,6 +864,46 @@ sse_rule_convsuslw (OrcCompiler *p, void *user, OrcInstruction *insn) int dest = p->vars[insn->dest_args[0]].alloc; orc_sse_emit_packusdw (p, src, dest); + orc_sse_emit_pslldq (p, 32, dest); + orc_sse_emit_psrldq (p, 32, dest); +} + +static void +sse_rule_convslq (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + if (src != dest) { + orc_sse_emit_movdqa (p, src, dest); + } + orc_sse_emit_movdqa (p, src, tmp); + orc_sse_emit_psrad (p, 31, tmp); + orc_sse_emit_punpckldq (p, tmp, dest); +} + +static void +sse_rule_convulq (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp; + + if (src != dest) { + orc_sse_emit_movdqa (p, src, dest); + } + tmp = orc_compiler_get_constant (p, 4, 0); + orc_sse_emit_punpckldq (p, tmp, dest); +} + +static void +sse_rule_convql (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + + orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,2,0), src, dest); } static void @@ -1909,6 +1949,7 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "loadb", sse_rule_loadX, NULL); orc_rule_register (rule_set, "loadw", sse_rule_loadX, NULL); orc_rule_register (rule_set, "loadl", sse_rule_loadX, NULL); + orc_rule_register (rule_set, "loadq", sse_rule_loadX, NULL); orc_rule_register (rule_set, "loadoffb", sse_rule_loadoffX, NULL); orc_rule_register (rule_set, "loadoffw", sse_rule_loadoffX, NULL); orc_rule_register (rule_set, "loadoffl", sse_rule_loadoffX, NULL); @@ -1917,10 +1958,12 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "loadpb", sse_rule_loadpX, NULL); orc_rule_register (rule_set, "loadpw", sse_rule_loadpX, NULL); orc_rule_register (rule_set, "loadpl", sse_rule_loadpX, NULL); + orc_rule_register (rule_set, "loadpq", sse_rule_loadpX, NULL); orc_rule_register (rule_set, "storeb", sse_rule_storeX, NULL); orc_rule_register (rule_set, "storew", sse_rule_storeX, NULL); orc_rule_register (rule_set, "storel", sse_rule_storeX, NULL); + orc_rule_register (rule_set, "storeq", sse_rule_storeX, NULL); REG(addb); REG(addssb); @@ -1995,6 +2038,11 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "convuwl", sse_rule_convuwl, NULL); orc_rule_register (rule_set, "convssslw", sse_rule_convssslw, NULL); + orc_rule_register (rule_set, "convql", sse_rule_convql, NULL); + orc_rule_register (rule_set, "convslq", sse_rule_convslq, NULL); + orc_rule_register (rule_set, "convulq", sse_rule_convulq, NULL); + //orc_rule_register (rule_set, "convsssql", sse_rule_convsssql, NULL); + orc_rule_register (rule_set, "mulsbw", sse_rule_mulsbw, NULL); orc_rule_register (rule_set, "mulubw", sse_rule_mulubw, NULL); orc_rule_register (rule_set, "mulswl", sse_rule_mulswl, NULL); -- 2.7.4