From 6a0020dee070008bcbeb59b1c8e8e34e3056d769 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Sun, 26 Apr 2009 18:53:42 -0700 Subject: [PATCH] Add accumulators, accum opcodes, and sse implementations --- orc/orccompiler.c | 18 +++- orc/orcexecutor.c | 14 +++ orc/orcopcodes.c | 237 +++++++++++++++++++++++++-------------------- orc/orcprogram-sse.c | 134 +++++++++++++++++++++++-- orc/orcprogram.c | 40 ++++++++ orc/orcprogram.h | 17 +++- orc/orcrules-sse.c | 84 +++++++++++++++- orc/x86.c | 24 +++++ orc/x86.h | 8 ++ testsuite/Makefile.am | 4 +- testsuite/test3.c | 24 ++++- testsuite/test_accsadubl.c | 87 +++++++++++++++++ testsuite/test_compile.c | 23 ++++- 13 files changed, 583 insertions(+), 131 deletions(-) create mode 100644 testsuite/test_accsadubl.c diff --git a/orc/orccompiler.c b/orc/orccompiler.c index 7d9175e..2bbd9b9 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -239,6 +239,15 @@ orc_compiler_rewrite_vars (OrcCompiler *compiler) if (compiler->vars[var].vartype == ORC_VAR_TYPE_PARAM) { ORC_PROGRAM_ERROR(compiler,"using param var as dest"); } + if (opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) { + if (compiler->vars[var].vartype != ORC_VAR_TYPE_ACCUMULATOR) { + ORC_PROGRAM_ERROR(compiler,"accumulating opcode to non-accumulator dest"); + } + } else { + if (compiler->vars[var].vartype == ORC_VAR_TYPE_ACCUMULATOR) { + ORC_PROGRAM_ERROR(compiler,"non-accumulating opcode to accumulator dest"); + } + } actual_var = var; if (compiler->vars[var].replaced) { @@ -294,7 +303,13 @@ orc_compiler_global_reg_alloc (OrcCompiler *compiler) case ORC_VAR_TYPE_DEST: var->ptr_register = orc_compiler_allocate_register (compiler, FALSE); break; + case ORC_VAR_TYPE_ACCUMULATOR: + var->first_use = -1; + var->last_use = -1; + var->alloc = orc_compiler_allocate_register (compiler, TRUE); + break; default: + ORC_PROGRAM_ERROR(compiler, "bad vartype"); break; } } @@ -341,9 +356,10 @@ orc_compiler_rewrite_vars2 (OrcCompiler *compiler) * - rule must handle it * - src1 must be last_use */ - if (1) { + if (!(compiler->insns[j].opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR)) { int src1 = compiler->insns[j].src_args[0]; int dest = compiler->insns[j].dest_args[0]; + if (compiler->vars[src1].last_use == j) { if (compiler->vars[src1].first_use == j) { k = orc_compiler_allocate_register (compiler, TRUE); diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c index 0f334db..668d3a4 100644 --- a/orc/orcexecutor.c +++ b/orc/orcexecutor.c @@ -69,6 +69,20 @@ orc_executor_set_param_str (OrcExecutor *ex, const char *name, int value) ex->params[var] = value; } +int +orc_executor_get_accumulator (OrcExecutor *ex, int var, int value) +{ + return ex->accumulators[var]; +} + +int +orc_executor_get_accumulator_str (OrcExecutor *ex, const char *name, int value) +{ + int var; + var = orc_program_find_var_by_name (ex->program, name); + return ex->accumulators[var]; +} + void orc_executor_set_n (OrcExecutor *ex, int n) { diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index c8c0a8c..1c58e5c 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -424,131 +424,154 @@ MUL(mulslq, int32_t, int64_t) MUL(mululq, uint32_t, uint64_t) #endif +#define ACC(name, type1) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + ex->dest_values[0] = ((type1)ex->src_values[0]); \ +} + +ACC(accw, int16_t); +ACC(accl, int32_t); + +static void +accsadubl (OrcOpcodeExecutor *ex, void *user) +{ + ex->dest_values[0] = abs((int)((uint8_t)ex->src_values[0]) - + (int)((uint8_t)ex->src_values[1])); +} + + static OrcStaticOpcode opcodes[] = { /* byte ops */ - { "absb", absb, NULL, { 1 }, { 1 } }, - { "addb", addb, NULL, { 1 }, { 1, 1 } }, - { "addssb", addssb, NULL, { 1 }, { 1, 1 } }, - { "addusb", addusb, NULL, { 1 }, { 1, 1 } }, - { "andb", andb, NULL, { 1 }, { 1, 1 } }, - { "andnb", andnb, NULL, { 1 }, { 1, 1 } }, - { "avgsb", avgsb, NULL, { 1 }, { 1, 1 } }, - { "avgub", avgub, NULL, { 1 }, { 1, 1 } }, - { "cmpeqb", cmpeqb, NULL, { 1 }, { 1, 1 } }, - { "cmpgtsb", cmpgtsb, NULL, { 1 }, { 1, 1 } }, - { "copyb", copyb, NULL, { 1 }, { 1 } }, - { "maxsb", maxsb, NULL, { 1 }, { 1, 1 } }, - { "maxub", maxub, NULL, { 1 }, { 1, 1 } }, - { "minsb", minsb, NULL, { 1 }, { 1, 1 } }, - { "minub", minub, NULL, { 1 }, { 1, 1 } }, - { "mullb", mullb, NULL, { 1 }, { 1, 1 } }, - { "mulhsb", mulhsb, NULL, { 1 }, { 1, 1 } }, - { "mulhub", mulhub, NULL, { 1 }, { 1, 1 } }, - { "orb", orb, NULL, { 1 }, { 1, 1 } }, - { "shlb", shlb, NULL, { 1 }, { 1, 1 } }, - { "shrsb", shrsb, NULL, { 1 }, { 1, 1 } }, - { "shrub", shrub, NULL, { 1 }, { 1, 1 } }, - { "signb", signb, NULL, { 1 }, { 1 } }, - { "subb", subb, NULL, { 1 }, { 1, 1 } }, - { "subssb", subssb, NULL, { 1 }, { 1, 1 } }, - { "subusb", subusb, NULL, { 1 }, { 1, 1 } }, - { "xorb", xorb, NULL, { 1 }, { 1, 1 } }, + { "absb", absb, NULL, 0, { 1 }, { 1 } }, + { "addb", addb, NULL, 0, { 1 }, { 1, 1 } }, + { "addssb", addssb, NULL, 0, { 1 }, { 1, 1 } }, + { "addusb", addusb, NULL, 0, { 1 }, { 1, 1 } }, + { "andb", andb, NULL, 0, { 1 }, { 1, 1 } }, + { "andnb", andnb, NULL, 0, { 1 }, { 1, 1 } }, + { "avgsb", avgsb, NULL, 0, { 1 }, { 1, 1 } }, + { "avgub", avgub, NULL, 0, { 1 }, { 1, 1 } }, + { "cmpeqb", cmpeqb, NULL, 0, { 1 }, { 1, 1 } }, + { "cmpgtsb", cmpgtsb, NULL, 0, { 1 }, { 1, 1 } }, + { "copyb", copyb, NULL, 0, { 1 }, { 1 } }, + { "maxsb", maxsb, NULL, 0, { 1 }, { 1, 1 } }, + { "maxub", maxub, NULL, 0, { 1 }, { 1, 1 } }, + { "minsb", minsb, NULL, 0, { 1 }, { 1, 1 } }, + { "minub", minub, NULL, 0, { 1 }, { 1, 1 } }, + { "mullb", mullb, NULL, 0, { 1 }, { 1, 1 } }, + { "mulhsb", mulhsb, NULL, 0, { 1 }, { 1, 1 } }, + { "mulhub", mulhub, NULL, 0, { 1 }, { 1, 1 } }, + { "orb", orb, NULL, 0, { 1 }, { 1, 1 } }, + { "shlb", shlb, NULL, 0, { 1 }, { 1, 1 } }, + { "shrsb", shrsb, NULL, 0, { 1 }, { 1, 1 } }, + { "shrub", shrub, NULL, 0, { 1 }, { 1, 1 } }, + { "signb", signb, NULL, 0, { 1 }, { 1 } }, + { "subb", subb, NULL, 0, { 1 }, { 1, 1 } }, + { "subssb", subssb, NULL, 0, { 1 }, { 1, 1 } }, + { "subusb", subusb, NULL, 0, { 1 }, { 1, 1 } }, + { "xorb", xorb, NULL, 0, { 1 }, { 1, 1 } }, /* word ops */ - { "absw", absw, NULL, { 2 }, { 2 } }, - { "addw", addw, NULL, { 2 }, { 2, 2 } }, - { "addssw", addssw, NULL, { 2 }, { 2, 2 } }, - { "addusw", addusw, NULL, { 2 }, { 2, 2 } }, - { "andw", andw, NULL, { 2 }, { 2, 2 } }, - { "andnw", andnw, NULL, { 2 }, { 2, 2 } }, - { "avgsw", avgsw, NULL, { 2 }, { 2, 2 } }, - { "avguw", avguw, NULL, { 2 }, { 2, 2 } }, - { "cmpeqw", cmpeqw, NULL, { 2 }, { 2, 2 } }, - { "cmpgtsw", cmpgtsw, NULL, { 2 }, { 2, 2 } }, - { "copyw", copyw, NULL, { 2 }, { 2 } }, - { "maxsw", maxsw, NULL, { 2 }, { 2, 2 } }, - { "maxuw", maxuw, NULL, { 2 }, { 2, 2 } }, - { "minsw", minsw, NULL, { 2 }, { 2, 2 } }, - { "minuw", minuw, NULL, { 2 }, { 2, 2 } }, - { "mullw", mullw, NULL, { 2 }, { 2, 2 } }, - { "mulhsw", mulhsw, NULL, { 2 }, { 2, 2 } }, - { "mulhuw", mulhuw, NULL, { 2 }, { 2, 2 } }, - { "orw", orw, NULL, { 2 }, { 2, 2 } }, - { "shlw", shlw, NULL, { 2 }, { 2, 2 } }, - { "shrsw", shrsw, NULL, { 2 }, { 2, 2 } }, - { "shruw", shruw, NULL, { 2 }, { 2, 2 } }, - { "signw", signw, NULL, { 2 }, { 2 } }, - { "subw", subw, NULL, { 2 }, { 2, 2 } }, - { "subssw", subssw, NULL, { 2 }, { 2, 2 } }, - { "subusw", subusw, NULL, { 2 }, { 2, 2 } }, - { "xorw", xorw, NULL, { 2 }, { 2, 2 } }, + { "absw", absw, NULL, 0, { 2 }, { 2 } }, + { "addw", addw, NULL, 0, { 2 }, { 2, 2 } }, + { "addssw", addssw, NULL, 0, { 2 }, { 2, 2 } }, + { "addusw", addusw, NULL, 0, { 2 }, { 2, 2 } }, + { "andw", andw, NULL, 0, { 2 }, { 2, 2 } }, + { "andnw", andnw, NULL, 0, { 2 }, { 2, 2 } }, + { "avgsw", avgsw, NULL, 0, { 2 }, { 2, 2 } }, + { "avguw", avguw, NULL, 0, { 2 }, { 2, 2 } }, + { "cmpeqw", cmpeqw, NULL, 0, { 2 }, { 2, 2 } }, + { "cmpgtsw", cmpgtsw, NULL, 0, { 2 }, { 2, 2 } }, + { "copyw", copyw, NULL, 0, { 2 }, { 2 } }, + { "maxsw", maxsw, NULL, 0, { 2 }, { 2, 2 } }, + { "maxuw", maxuw, NULL, 0, { 2 }, { 2, 2 } }, + { "minsw", minsw, NULL, 0, { 2 }, { 2, 2 } }, + { "minuw", minuw, NULL, 0, { 2 }, { 2, 2 } }, + { "mullw", mullw, NULL, 0, { 2 }, { 2, 2 } }, + { "mulhsw", mulhsw, NULL, 0, { 2 }, { 2, 2 } }, + { "mulhuw", mulhuw, NULL, 0, { 2 }, { 2, 2 } }, + { "orw", orw, NULL, 0, { 2 }, { 2, 2 } }, + { "shlw", shlw, NULL, 0, { 2 }, { 2, 2 } }, + { "shrsw", shrsw, NULL, 0, { 2 }, { 2, 2 } }, + { "shruw", shruw, NULL, 0, { 2 }, { 2, 2 } }, + { "signw", signw, NULL, 0, { 2 }, { 2 } }, + { "subw", subw, NULL, 0, { 2 }, { 2, 2 } }, + { "subssw", subssw, NULL, 0, { 2 }, { 2, 2 } }, + { "subusw", subusw, NULL, 0, { 2 }, { 2, 2 } }, + { "xorw", xorw, NULL, 0, { 2 }, { 2, 2 } }, /* long ops */ - { "absl", absl, NULL, { 4 }, { 4 } }, - { "addl", addl, NULL, { 4 }, { 4, 4 } }, - { "addssl", addssl, NULL, { 4 }, { 4, 4 } }, - { "addusl", addusl, NULL, { 4 }, { 4, 4 } }, - { "andl", andl, NULL, { 4 }, { 4, 4 } }, - { "andnl", andnl, NULL, { 4 }, { 4, 4 } }, - { "avgsl", avgsl, NULL, { 4 }, { 4, 4 } }, - { "avgul", avgul, NULL, { 4 }, { 4, 4 } }, - { "cmpeql", cmpeql, NULL, { 4 }, { 4, 4 } }, - { "cmpgtsl", cmpgtsl, NULL, { 4 }, { 4, 4 } }, - { "copyl", copyl, NULL, { 4 }, { 4 } }, - { "maxsl", maxsl, NULL, { 4 }, { 4, 4 } }, - { "maxul", maxul, NULL, { 4 }, { 4, 4 } }, - { "minsl", minsl, NULL, { 4 }, { 4, 4 } }, - { "minul", minul, NULL, { 4 }, { 4, 4 } }, - { "mulll", mulll, NULL, { 4 }, { 4, 4 } }, - { "mulhsl", mulhsl, NULL, { 4 }, { 4, 4 } }, - { "mulhul", mulhul, NULL, { 4 }, { 4, 4 } }, - { "orl", orl, NULL, { 4 }, { 4, 4 } }, - { "shll", shll, NULL, { 4 }, { 4, 4 } }, - { "shrsl", shrsl, NULL, { 4 }, { 4, 4 } }, - { "shrul", shrul, NULL, { 4 }, { 4, 4 } }, - { "signl", signl, NULL, { 4 }, { 4 } }, - { "subl", subl, NULL, { 4 }, { 4, 4 } }, - { "subssl", subssl, NULL, { 4 }, { 4, 4 } }, - { "subusl", subusl, NULL, { 4 }, { 4, 4 } }, - { "xorl", xorl, NULL, { 4 }, { 4, 4 } }, - - { "convsbw", convsbw, NULL, { 2 }, { 1 } }, - { "convubw", convubw, NULL, { 2 }, { 1 } }, - { "convswl", convswl, NULL, { 4 }, { 2 } }, - { "convuwl", convuwl, NULL, { 4 }, { 2 } }, + { "absl", absl, NULL, 0, { 4 }, { 4 } }, + { "addl", addl, NULL, 0, { 4 }, { 4, 4 } }, + { "addssl", addssl, NULL, 0, { 4 }, { 4, 4 } }, + { "addusl", addusl, NULL, 0, { 4 }, { 4, 4 } }, + { "andl", andl, NULL, 0, { 4 }, { 4, 4 } }, + { "andnl", andnl, NULL, 0, { 4 }, { 4, 4 } }, + { "avgsl", avgsl, NULL, 0, { 4 }, { 4, 4 } }, + { "avgul", avgul, NULL, 0, { 4 }, { 4, 4 } }, + { "cmpeql", cmpeql, NULL, 0, { 4 }, { 4, 4 } }, + { "cmpgtsl", cmpgtsl, NULL, 0, { 4 }, { 4, 4 } }, + { "copyl", copyl, NULL, 0, { 4 }, { 4 } }, + { "maxsl", maxsl, NULL, 0, { 4 }, { 4, 4 } }, + { "maxul", maxul, NULL, 0, { 4 }, { 4, 4 } }, + { "minsl", minsl, NULL, 0, { 4 }, { 4, 4 } }, + { "minul", minul, NULL, 0, { 4 }, { 4, 4 } }, + { "mulll", mulll, NULL, 0, { 4 }, { 4, 4 } }, + { "mulhsl", mulhsl, NULL, 0, { 4 }, { 4, 4 } }, + { "mulhul", mulhul, NULL, 0, { 4 }, { 4, 4 } }, + { "orl", orl, NULL, 0, { 4 }, { 4, 4 } }, + { "shll", shll, NULL, 0, { 4 }, { 4, 4 } }, + { "shrsl", shrsl, NULL, 0, { 4 }, { 4, 4 } }, + { "shrul", shrul, NULL, 0, { 4 }, { 4, 4 } }, + { "signl", signl, NULL, 0, { 4 }, { 4 } }, + { "subl", subl, NULL, 0, { 4 }, { 4, 4 } }, + { "subssl", subssl, NULL, 0, { 4 }, { 4, 4 } }, + { "subusl", subusl, NULL, 0, { 4 }, { 4, 4 } }, + { "xorl", xorl, NULL, 0, { 4 }, { 4, 4 } }, + + { "convsbw", convsbw, NULL, 0, { 2 }, { 1 } }, + { "convubw", convubw, NULL, 0, { 2 }, { 1 } }, + { "convswl", convswl, NULL, 0, { 4 }, { 2 } }, + { "convuwl", convuwl, NULL, 0, { 4 }, { 2 } }, #ifdef ENABLE_64BIT - { "convslq", convslq, NULL, { 8 }, { 4 } }, - { "convulq", convulq, NULL, { 8 }, { 4 } }, + { "convslq", convslq, NULL, 0, { 8 }, { 4 } }, + { "convulq", convulq, NULL, 0, { 8 }, { 4 } }, #endif - { "convwb", convwb, NULL, { 1 }, { 2 } }, - { "convssswb", convssswb, NULL, { 1 }, { 2 } }, - { "convsuswb", convsuswb, NULL, { 1 }, { 2 } }, - { "convusswb", convusswb, NULL, { 1 }, { 2 } }, - { "convuuswb", convuuswb, NULL, { 1 }, { 2 } }, + { "convwb", convwb, NULL, 0, { 1 }, { 2 } }, + { "convssswb", convssswb, NULL, 0, { 1 }, { 2 } }, + { "convsuswb", convsuswb, NULL, 0, { 1 }, { 2 } }, + { "convusswb", convusswb, NULL, 0, { 1 }, { 2 } }, + { "convuuswb", convuuswb, NULL, 0, { 1 }, { 2 } }, - { "convlw", convlw, NULL, { 2 }, { 4 } }, - { "convssslw", convssslw, NULL, { 2 }, { 4 } }, - { "convsuslw", convsuslw, NULL, { 2 }, { 4 } }, - { "convusslw", convusslw, NULL, { 2 }, { 4 } }, - { "convuuslw", convuuslw, NULL, { 2 }, { 4 } }, + { "convlw", convlw, NULL, 0, { 2 }, { 4 } }, + { "convssslw", convssslw, NULL, 0, { 2 }, { 4 } }, + { "convsuslw", convsuslw, NULL, 0, { 2 }, { 4 } }, + { "convusslw", convusslw, NULL, 0, { 2 }, { 4 } }, + { "convuuslw", convuuslw, NULL, 0, { 2 }, { 4 } }, #ifdef ENABLE_64BIT - { "convql", convql, NULL, { 4 }, { 8 } }, - { "convssql", convssql, NULL, { 4 }, { 8 } }, - { "convusql", convusql, NULL, { 4 }, { 8 } }, + { "convql", convql, NULL, 0, { 4 }, { 8 } }, + { "convssql", convssql, NULL, 0, { 4 }, { 8 } }, + { "convusql", convusql, NULL, 0, { 4 }, { 8 } }, #endif - { "mulsbw", mulsbw, NULL, { 2 }, { 1, 1 } }, - { "mulubw", mulubw, NULL, { 2 }, { 1, 1 } }, - { "mulswl", mulswl, NULL, { 4 }, { 2, 2 } }, - { "muluwl", muluwl, NULL, { 4 }, { 2, 2 } }, + { "mulsbw", mulsbw, NULL, 0, { 2 }, { 1, 1 } }, + { "mulubw", mulubw, NULL, 0, { 2 }, { 1, 1 } }, + { "mulswl", mulswl, NULL, 0, { 4 }, { 2, 2 } }, + { "muluwl", muluwl, NULL, 0, { 4 }, { 2, 2 } }, #ifdef ENABLE_64BIT - { "mulslq", mulslq, NULL, { 8 }, { 4, 4 } }, - { "mululq", mululq, NULL, { 8 }, { 4, 4 } }, + { "mulslq", mulslq, NULL, 0, { 8 }, { 4, 4 } }, + { "mululq", mululq, NULL, 0, { 8 }, { 4, 4 } }, #endif + /* accumulators */ + { "accw", accw, NULL, ORC_STATIC_OPCODE_ACCUMULATOR, { 2 }, { 2 } }, + { "accl", accl, NULL, ORC_STATIC_OPCODE_ACCUMULATOR, { 4 }, { 4 } }, + { "accsadubl", accsadubl, NULL, ORC_STATIC_OPCODE_ACCUMULATOR, { 4 }, { 1, 1 } }, + { "" } }; diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index da2e723..60ef2bb 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -113,6 +113,94 @@ orc_compiler_sse_init (OrcCompiler *compiler) } void +sse_save_accumulators (OrcCompiler *compiler) +{ + int i; + int src; + + for(i=0;ivars + i; + + if (compiler->vars[i].name == NULL) continue; + switch (compiler->vars[i].vartype) { + case ORC_VAR_TYPE_ACCUMULATOR: + src = compiler->vars[i].alloc; + + sse_emit_660f (compiler, "pshufd $0xee,", 0x70, src, compiler->tmpreg); +#if 0 + ORC_ASM_CODE(compiler," pshufd $0xee, %%%s, %%%s\n", + x86_get_regname_sse(src), + x86_get_regname_sse(compiler->tmpreg)); + *compiler->codeptr++ = 0x66; + x86_emit_rex (compiler, 0, src, 0, compiler->tmpreg); + *compiler->codeptr++ = 0x0f; + *compiler->codeptr++ = 0x70; + x86_emit_modrm_reg (compiler, src, compiler->tmpreg); +#endif + *compiler->codeptr++ = 0xee; + + if (compiler->vars[i].size == 2) { + sse_emit_660f (compiler, "paddw", 0xfd, compiler->tmpreg, src); + } else { + sse_emit_660f (compiler, "paddd", 0xfe, compiler->tmpreg, src); + } + + sse_emit_660f (compiler, "pshufd $0x55,", 0x70, src, compiler->tmpreg); +#if 0 + ORC_ASM_CODE(compiler," pshufd $0x55, %%%s, %%%s\n", + x86_get_regname_sse(src), + x86_get_regname_sse(compiler->tmpreg)); + *compiler->codeptr++ = 0x66; + x86_emit_rex (compiler, 0, src, 0, compiler->tmpreg); + *compiler->codeptr++ = 0x0f; + *compiler->codeptr++ = 0xef; + x86_emit_modrm_reg (compiler, src, compiler->tmpreg); +#endif + *compiler->codeptr++ = 0x55; + + if (compiler->vars[i].size == 2) { + sse_emit_660f (compiler, "paddw", 0xfd, compiler->tmpreg, src); + } else { + sse_emit_660f (compiler, "paddd", 0xfe, compiler->tmpreg, src); + } + + if (compiler->vars[i].size == 2) { + sse_emit_f20f (compiler, "pshuflw $0x55,", 0x70, src, compiler->tmpreg); +#if 0 + ORC_ASM_CODE(compiler," pshuflw $0x55, %%%s, %%%s\n", + x86_get_regname_sse(src), + x86_get_regname_sse(compiler->tmpreg)); + *compiler->codeptr++ = 0x66; + x86_emit_rex (compiler, 0, src, 0, compiler->tmpreg); + *compiler->codeptr++ = 0x0f; + *compiler->codeptr++ = 0xef; + x86_emit_modrm_reg (compiler, src, compiler->tmpreg); +#endif + *compiler->codeptr++ = 0x55; + + sse_emit_660f (compiler, "paddw", 0xfd, compiler->tmpreg, src); + } + + if (compiler->vars[i].size == 2) { + x86_emit_mov_sse_reg (compiler, src, X86_ECX); + x86_emit_mov_reg_memoffset (compiler, 2, X86_ECX, + (int)ORC_STRUCT_OFFSET(OrcExecutor, accumulators[i-ORC_VAR_A1]), + x86_exec_ptr); + } else { + x86_emit_mov_sse_memoffset (compiler, 4, src, + (int)ORC_STRUCT_OFFSET(OrcExecutor, accumulators[i-ORC_VAR_A1]), + x86_exec_ptr, + var->is_aligned, var->is_uncached); + } + + break; + default: + break; + } + } +} + +void sse_load_constants (OrcCompiler *compiler) { int i; @@ -154,7 +242,18 @@ sse_load_constants (OrcCompiler *compiler) ORC_PROGRAM_ERROR(compiler,"unimplemented"); } break; + case ORC_VAR_TYPE_ACCUMULATOR: + ORC_ASM_CODE(compiler," pxor %%%s, %%%s\n", + x86_get_regname_sse(compiler->vars[i].alloc), + x86_get_regname_sse(compiler->vars[i].alloc)); + *compiler->codeptr++ = 0x66; + x86_emit_rex (compiler, 0, compiler->vars[i].alloc, 0, compiler->vars[i].alloc); + *compiler->codeptr++ = 0x0f; + *compiler->codeptr++ = 0xef; + x86_emit_modrm_reg (compiler, compiler->vars[i].alloc, compiler->vars[i].alloc); + break; default: + ORC_PROGRAM_ERROR(compiler,"bad vartype"); break; } } @@ -249,6 +348,17 @@ sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var) } static int +get_align_var (OrcCompiler *compiler) +{ + if (compiler->vars[ORC_VAR_D1].size) return ORC_VAR_D1; + if (compiler->vars[ORC_VAR_S1].size) return ORC_VAR_S1; + + ORC_PROGRAM_ERROR(compiler, "could not find alignment variable"); + + return -1; +} + +static int get_shift (int size) { switch (size) { @@ -263,26 +373,27 @@ get_shift (int size) } return -1; } + void orc_compiler_sse_assemble (OrcCompiler *compiler) { - int dest_var; - int dest_shift; + int align_var; + int align_shift; - dest_var = orc_compiler_get_dest (compiler); - dest_shift = get_shift (compiler->vars[dest_var].size); + align_var = get_align_var (compiler); + align_shift = get_shift (compiler->vars[align_var].size); - compiler->vars[dest_var].is_aligned = FALSE; + compiler->vars[align_var].is_aligned = FALSE; x86_emit_prologue (compiler); if (compiler->loop_shift > 0) { x86_emit_mov_imm_reg (compiler, 4, 16, X86_EAX); x86_emit_sub_memoffset_reg (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[dest_var]), + (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[align_var]), x86_exec_ptr, X86_EAX); x86_emit_and_imm_reg (compiler, 4, 15, X86_EAX); - x86_emit_sar_imm_reg (compiler, 4, dest_shift, X86_EAX); + x86_emit_sar_imm_reg (compiler, 4, align_shift, X86_EAX); x86_emit_cmp_reg_memoffset (compiler, 4, X86_EAX, (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr); @@ -347,7 +458,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) x86_emit_jne (compiler, 0); compiler->loop_shift = save_loop_shift; - compiler->vars[dest_var].is_aligned = TRUE; + compiler->vars[align_var].is_aligned = TRUE; } x86_emit_label (compiler, 1); @@ -368,7 +479,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) if (compiler->loop_shift > 0) { int save_loop_shift; - compiler->vars[dest_var].is_aligned = FALSE; + compiler->vars[align_var].is_aligned = FALSE; x86_emit_cmp_imm_memoffset (compiler, 4, 0, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), x86_exec_ptr); x86_emit_je (compiler, 5); @@ -388,6 +499,8 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) compiler->loop_shift = save_loop_shift; } + sse_save_accumulators (compiler); + x86_emit_epilogue (compiler); x86_do_fixups (compiler); @@ -444,7 +557,8 @@ sse_emit_loop (OrcCompiler *compiler) rule = insn->rule; if (rule && rule->emit) { - if (compiler->vars[insn->dest_args[0]].alloc != + if (!(insn->opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) && + compiler->vars[insn->dest_args[0]].alloc != compiler->vars[insn->src_args[0]].alloc) { x86_emit_mov_sse_reg_reg (compiler, compiler->vars[insn->src_args[0]].alloc, diff --git a/orc/orcprogram.c b/orc/orcprogram.c index 2156639..2e9cddf 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -51,6 +51,33 @@ orc_program_new_ds (int size1, int size2) return p; } +OrcProgram * +orc_program_new_ass (int size1, int size2, int size3) +{ + OrcProgram *p; + + p = orc_program_new (); + + orc_program_add_accumulator (p, size1, "a1"); + orc_program_add_source (p, size2, "s1"); + orc_program_add_source (p, size3, "s2"); + + return p; +} + +OrcProgram * +orc_program_new_as (int size1, int size2) +{ + OrcProgram *p; + + p = orc_program_new (); + + orc_program_add_accumulator (p, size1, "a1"); + orc_program_add_source (p, size2, "s1"); + + return p; +} + void orc_program_free (OrcProgram *program) { @@ -159,6 +186,19 @@ orc_program_add_parameter (OrcProgram *program, int size, const char *name) return i; } +int +orc_program_add_accumulator (OrcProgram *program, int size, const char *name) +{ + int i = ORC_VAR_A1 + program->n_accum_vars; + + program->vars[i].vartype = ORC_VAR_TYPE_ACCUMULATOR; + program->vars[i].size = size; + program->vars[i].name = strdup(name); + program->n_param_vars++; + + return i; +} + void orc_program_append_ds (OrcProgram *program, const char *name, int arg0, int arg1) diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 33b45c8..6c51d4d 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -78,7 +78,8 @@ typedef enum { ORC_VAR_TYPE_SRC, ORC_VAR_TYPE_DEST, ORC_VAR_TYPE_CONST, - ORC_VAR_TYPE_PARAM + ORC_VAR_TYPE_PARAM, + ORC_VAR_TYPE_ACCUMULATOR } OrcVarType; enum { @@ -94,6 +95,10 @@ enum { ORC_VAR_S6, ORC_VAR_S7, ORC_VAR_S8, + ORC_VAR_A1, + ORC_VAR_A2, + ORC_VAR_A3, + ORC_VAR_A4, ORC_VAR_C1, ORC_VAR_C2, ORC_VAR_C3, @@ -163,10 +168,13 @@ struct _OrcOpcodeSet { OrcStaticOpcode *opcodes; }; +#define ORC_STATIC_OPCODE_ACCUMULATOR 1 + struct _OrcStaticOpcode { char name[16]; OrcOpcodeEmulateFunc emulate; void *emulate_user; + unsigned int flags; int dest_size[ORC_STATIC_OPCODE_N_DEST]; int src_size[ORC_STATIC_OPCODE_N_SRC]; }; @@ -196,6 +204,7 @@ struct _OrcProgram { int n_param_vars; int n_const_vars; int n_temp_vars; + int n_accum_vars; char *name; char *asm_code; @@ -252,6 +261,7 @@ struct _OrcExecutor { void *arrays[ORC_N_VARIABLES]; int params[ORC_N_VARIABLES]; + int accumulators[4]; }; struct _OrcTarget { @@ -272,6 +282,8 @@ void orc_init (void); OrcProgram * orc_program_new (void); OrcProgram * orc_program_new_ds (int size1, int size2); OrcProgram * orc_program_new_dss (int size1, int size2, int size3); +OrcProgram * orc_program_new_as (int size1, int size2); +OrcProgram * orc_program_new_ass (int size1, int size2, int size3); OrcStaticOpcode * orc_opcode_find_by_name (const char *name); void orc_opcode_init (void); @@ -305,6 +317,7 @@ int orc_program_add_source (OrcProgram *program, int size, const char *name); int orc_program_add_destination (OrcProgram *program, int size, const char *name); int orc_program_add_constant (OrcProgram *program, int size, int value, const char *name); int orc_program_add_parameter (OrcProgram *program, int size, const char *name); +int orc_program_add_accumulator (OrcProgram *program, int size, const char *name); void orc_program_x86_reset_alloc (OrcProgram *program); void orc_program_powerpc_reset_alloc (OrcProgram *program); @@ -316,6 +329,8 @@ void orc_executor_set_array (OrcExecutor *ex, int var, void *ptr); void orc_executor_set_array_str (OrcExecutor *ex, const char *name, void *ptr); void orc_executor_set_parameter (OrcExecutor *ex, int var, int value); void orc_executor_set_param_str (OrcExecutor *ex, const char *name, int value); +int orc_executor_get_accumulator (OrcExecutor *ex, int var, int value); +int orc_executor_get_accumulator_str (OrcExecutor *ex, const char *name, int value); void orc_executor_set_n (OrcExecutor *ex, int n); void orc_executor_emulate (OrcExecutor *ex); void orc_executor_run (OrcExecutor *ex); diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 1a46f26..e1b4d75 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -17,7 +17,21 @@ int ssse3 = TRUE; int sse41 = FALSE; -static void +void +sse_emit_f20f (OrcCompiler *p, const char *insn_name, int code, + int src, int dest) +{ + ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, + x86_get_regname_sse(src), + x86_get_regname_sse(dest)); + *p->codeptr++ = 0xf2; + x86_emit_rex (p, 0, src, 0, dest); + *p->codeptr++ = 0x0f; + *p->codeptr++ = code; + x86_emit_modrm_reg (p, src, dest); +} + +void sse_emit_660f (OrcCompiler *p, const char *insn_name, int code, int src, int dest) { @@ -31,7 +45,7 @@ sse_emit_660f (OrcCompiler *p, const char *insn_name, int code, x86_emit_modrm_reg (p, src, dest); } -static void +void sse_emit_660f38 (OrcCompiler *p, const char *insn_name, int code, int src, int dest) { @@ -294,6 +308,68 @@ BINARY(xorl,"pxor",0xef) static void +sse_rule_accw (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + + sse_emit_660f (p, "paddw", 0xfd, src, dest); +} + +static void +sse_rule_accl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + + sse_emit_660f (p, "paddd", 0xfe, src, dest); +} + +static void +sse_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src1 = p->vars[insn->src_args[0]].alloc; + int src2 = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + + sse_emit_660f (p, "movdqa", 0x6f, src1, p->tmpreg); + sse_emit_660f (p, "psadbw", 0xf6, src2, p->tmpreg); + sse_emit_660f (p, "paddd", 0xfe, p->tmpreg, dest); + +#if 0 + ORC_ASM_CODE(p," movd %%%s, %%ecx\n", x86_get_regname_sse(dest)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x7e; + x86_emit_modrm_reg (p, X86_ECX, dest); + + x86_emit_add_reg_memoffset (p, 4, X86_ECX, + (int)ORC_STRUCT_OFFSET(OrcExecutor, accumulators[0]), + x86_exec_ptr); + + if (p->loop_shift > 3) { + ORC_ASM_CODE(p," psrldq $8, %%%s\n", x86_get_regname_sse(dest)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x73; + x86_emit_modrm_reg (p, dest, 3); + *p->codeptr++ = 0x08; + + ORC_ASM_CODE(p," movd %%%s, %%ecx\n", x86_get_regname_sse(dest)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x7e; + x86_emit_modrm_reg (p, X86_ECX, dest); + + x86_emit_add_reg_memoffset (p, 4, X86_ECX, + (int)ORC_STRUCT_OFFSET(OrcExecutor, accumulators[0]), + x86_exec_ptr); + + } +#endif +} + +static void sse_rule_signX (OrcCompiler *p, void *user, OrcInstruction *insn) { int src = p->vars[insn->src_args[0]].alloc; @@ -812,6 +888,10 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "mulswl", sse_rule_mulswl, NULL); + orc_rule_register (rule_set, "accw", sse_rule_accw, NULL); + orc_rule_register (rule_set, "accl", sse_rule_accl, NULL); + orc_rule_register (rule_set, "accsadubl", sse_rule_accsadubl, NULL); + /* slow rules */ orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL); orc_rule_register (rule_set, "minuw", sse_rule_minuw_slow, NULL); diff --git a/orc/x86.c b/orc/x86.c index 6a365d7..976a8c7 100644 --- a/orc/x86.c +++ b/orc/x86.c @@ -687,6 +687,30 @@ x86_emit_add_imm_memoffset (OrcCompiler *compiler, int size, int value, } void +x86_emit_add_reg_memoffset (OrcCompiler *compiler, int size, int reg1, + int offset, int reg) +{ + if (size == 2) { + ORC_ASM_CODE(compiler," addw %%%s, %d(%%%s)\n", + x86_get_regname_ptr(reg1), offset, + x86_get_regname_ptr(reg)); + *compiler->codeptr++ = 0x66; + } else if (size == 4) { + ORC_ASM_CODE(compiler," addl %%%s, %d(%%%s)\n", + x86_get_regname_ptr(reg1), offset, + x86_get_regname_ptr(reg)); + } else { + ORC_ASM_CODE(compiler," add %%%s, %d(%%%s)\n", + x86_get_regname_ptr(reg1), offset, + x86_get_regname_ptr(reg)); + } + + x86_emit_rex(compiler, size, 0, 0, reg); + *compiler->codeptr++ = 0x01; + x86_emit_modrm_memoffset (compiler, reg1, offset, reg); +} + +void x86_emit_add_imm_reg (OrcCompiler *compiler, int size, int value, int reg) { if (size == 2) { diff --git a/orc/x86.h b/orc/x86.h index 0dc2329..2090911 100644 --- a/orc/x86.h +++ b/orc/x86.h @@ -32,6 +32,7 @@ void x86_emit_test_reg_reg (OrcCompiler *compiler, int size, int reg1, int reg2) void x86_emit_sar_imm_reg (OrcCompiler *compiler, int size, int value, int reg); void x86_emit_dec_memoffset (OrcCompiler *compiler, int size, int offset, int reg); void x86_emit_add_imm_memoffset (OrcCompiler *compiler, int size, int value, int offset, int reg); +void x86_emit_add_reg_memoffset (OrcCompiler *compiler, int size, int reg1, int offset, int reg); void x86_emit_and_imm_memoffset (OrcCompiler *compiler, int size, int value, int offset, int reg); void x86_emit_add_imm_reg (OrcCompiler *compiler, int size, int value, int reg); void x86_emit_and_imm_reg (OrcCompiler *compiler, int size, int value, int reg); @@ -68,6 +69,13 @@ void sse_emit_loadpb (OrcCompiler *p, int reg, int value); void sse_emit_loadpw (OrcCompiler *p, int reg, int value); void sse_emit_loadpl (OrcCompiler *p, int reg, int value); +void sse_emit_660f (OrcCompiler *p, const char *insn_name, int code, + int src, int dest); +void sse_emit_f20f (OrcCompiler *p, const char *insn_name, int code, + int src, int dest); +void sse_emit_660f38 (OrcCompiler *p, const char *insn_name, int code, + int src, int dest); + enum { X86_EAX = ORC_GP_REG_BASE, X86_ECX, diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am index ade3650..e487b82 100644 --- a/testsuite/Makefile.am +++ b/testsuite/Makefile.am @@ -1,7 +1,7 @@ -TESTS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile +TESTS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile test_accsadubl -orcbin_PROGRAMS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile +orcbin_PROGRAMS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile test_accsadubl AM_CFLAGS = $(ORC_CFLAGS) LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-0.3.la diff --git a/testsuite/test3.c b/testsuite/test3.c index 286f53c..c859a1d 100644 --- a/testsuite/test3.c +++ b/testsuite/test3.c @@ -41,13 +41,31 @@ test_opcode (OrcStaticOpcode *opcode) char s[40]; int ret; - p = orc_program_new_dss (opcode->dest_size[0], - opcode->src_size[0], opcode->src_size[1]); + if (opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) { + if (opcode->src_size[1] == 0) { + p = orc_program_new_as (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_ass (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } + } else { + if (opcode->src_size[1] == 0) { + p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } + } sprintf(s, "test_%s", opcode->name); orc_program_set_name (p, s); + //orc_program_add_constant (p, 2, 1, "c1"); - orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); + if (opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) { + orc_program_append_str (p, opcode->name, "a1", "s1", "s2"); + } else { + orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); + } ret = orc_program_compile (p); if (!ret) { diff --git a/testsuite/test_accsadubl.c b/testsuite/test_accsadubl.c new file mode 100644 index 0000000..380dad8 --- /dev/null +++ b/testsuite/test_accsadubl.c @@ -0,0 +1,87 @@ + +#include "config.h" + +#include +#include + +#include +#include + + +int error = FALSE; + +void test_opcode (OrcStaticOpcode *opcode); + +uint8_t array1[100]; +uint8_t array2[100]; + +int orc_sad_u8 (uint8_t *s1, uint8_t *s2, int n); + +int +main (int argc, char *argv[]) +{ + int i; + int n; + int sum; + + orc_init(); + +for(n=0;n<20;n++){ + sum = 0; + for(i=0;i %d\n", i, array1[i], array2[i], + abs(array1[i] - array2[i])); + sum += abs(array1[i] - array2[i]); + } + + printf("sum %d %d\n", sum, orc_sad_u8 (array1, array2, n)); +} + + if (error) return 1; + return 0; +} + + +int +orc_sad_u8 (uint8_t *s1, uint8_t *s2, int n) +{ + static OrcProgram *p = NULL; + OrcExecutor *ex; + int sum; + + if (p == NULL) { + int ret; + + p = orc_program_new (); + orc_program_add_accumulator (p, 4, "a1"); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + + orc_program_append_str (p, "accsadubl", "a1", "s1", "s2"); + + ret = orc_program_compile (p); + if (!ret) { + ORC_ERROR("Orc compiler failure"); + } + + printf("%s\n", orc_program_get_asm_code (p)); + } + + ex = orc_executor_new (p); + orc_executor_set_n (ex, n); + orc_executor_set_array_str (ex, "s1", s1); + orc_executor_set_array_str (ex, "s2", s2); + + orc_executor_run (ex); + + //sum = orc_executor_get_accumulator (ex, "a1"); + sum = ex->accumulators[0]; + + orc_executor_free (ex); + + return sum; +} + + diff --git a/testsuite/test_compile.c b/testsuite/test_compile.c index 75b6386..f95aeb5 100644 --- a/testsuite/test_compile.c +++ b/testsuite/test_compile.c @@ -63,17 +63,30 @@ test_opcode (OrcStaticOpcode *opcode) OrcProgram *p; char s[40]; - if (opcode->src_size[1] == 0) { - p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]); + if (opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) { + if (opcode->src_size[1] == 0) { + p = orc_program_new_as (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_ass (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } } else { - p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0], - opcode->src_size[1]); + if (opcode->src_size[1] == 0) { + p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } } sprintf(s, "test_%s", opcode->name); orc_program_set_name (p, s); - orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); + if (opcode->flags & ORC_STATIC_OPCODE_ACCUMULATOR) { + orc_program_append_str (p, opcode->name, "a1", "s1", "s2"); + } else { + orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); + } orc_test_gcc_compile (p); -- 2.7.4