From a60bfcb45b057437de447b19df0d8b5dc8fd2a34 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Sat, 28 Mar 2009 18:23:13 -0700 Subject: [PATCH] lots of hacking --- orc-test/orctest.c | 158 ++++++++++++++++++ orc-test/orctest.h | 3 + orc/orcexecutor.c | 27 ++- orc/orcopcodes.c | 115 ++++++------- orc/orcprogram-sse.c | 41 ++++- orc/orcprogram.c | 14 ++ orc/orcprogram.h | 4 +- orc/orcrules-sse.c | 284 ++++++++++++++++++++++++++++++-- orc/orcutils.h | 4 +- testsuite/Makefile.am | 6 +- testsuite/test5.c | 19 ++- testsuite/test_local_opcode_execution.c | 65 ++++++++ tools/orcc.c | 8 + 13 files changed, 644 insertions(+), 104 deletions(-) create mode 100644 testsuite/test_local_opcode_execution.c diff --git a/orc-test/orctest.c b/orc-test/orctest.c index 05b935d..6eea796 100644 --- a/orc-test/orctest.c +++ b/orc-test/orctest.c @@ -3,6 +3,7 @@ #include #include +#include void @@ -10,6 +11,7 @@ orc_test_init (void) { orc_init (); + setvbuf (stdout, NULL, _IONBF, 0); } @@ -75,3 +77,159 @@ orc_test_gcc_compile (OrcProgram *p) } +void +orc_test_random_bits (void *data, int n_bytes) +{ + uint8_t *d = data; + int i; + for(i=0;in_vars;i++){ + if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) { + uint8_t *data; + data = malloc(n*program->vars[i].size); + orc_test_random_bits (data, n*program->vars[i].size); + orc_executor_set_array (ex, i, data); + } else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) { + dest_index = i; + } else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) { + orc_executor_set_parameter (ex, i, 2); + } + } + if (dest_index == -1) { + return FALSE; + } + + dest_exec = malloc(n*program->vars[dest_index].size); + memset (dest_exec, 0xa5, n*program->vars[dest_index].size); + dest_emul = malloc(n*program->vars[dest_index].size); + memset (dest_emul, 0xa5, n*program->vars[dest_index].size); + + orc_executor_set_array (ex, dest_index, dest_exec); + orc_executor_run (ex); + + orc_executor_set_array (ex, dest_index, dest_emul); + orc_executor_emulate (ex); + + if (memcmp (dest_exec, dest_emul, n*program->vars[dest_index].size) != 0) { + for(i=0;in_vars;j++){ + if (program->vars[j].vartype == ORC_VAR_TYPE_SRC && + program->vars[j].size > 0) { + print_array_val (ex->arrays[j], program->vars[j].size, i); + } + } + + printf(" ->"); + a = print_array_val (dest_emul, program->vars[dest_index].size, i); + b = print_array_val (dest_exec, program->vars[dest_index].size, i); + + if (a != b) { + printf(" *"); + } + + printf("\n"); + } +#if 0 + switch (program->vars[dest_index].size) { + case 1: + { + uint8_t *a = dest_emul; + uint8_t *b = dest_exec; + for(i=0;in;i++){ for(j=0;jn_insns;j++){ insn = program->insns + j; @@ -96,39 +98,48 @@ orc_executor_emulate (OrcExecutor *ex) void *ptr = ex->arrays[insn->src_args[k]] + program->vars[insn->src_args[k]].size*i; + if (opcode->src_size[k] == 0) continue; + switch (program->vars[insn->src_args[k]].size) { case 1: - opcode_ex.values[k] = *(int8_t *)ptr; + opcode_ex.src_values[k] = *(int8_t *)ptr; break; case 2: - opcode_ex.values[k] = *(int16_t *)ptr; + opcode_ex.src_values[k] = *(int16_t *)ptr; break; case 4: - opcode_ex.values[k] = *(int32_t *)ptr; + opcode_ex.src_values[k] = *(int32_t *)ptr; break; default: - ORC_ERROR("ack"); + ORC_ERROR("unhandled size %d", program->vars[insn->src_args[k]].size); } } opcode->emulate (&opcode_ex, opcode->emulate_user); +#if 0 + ORC_ERROR("emulate %s: %d %d -> %d", + opcode->name, opcode_ex.src_values[0], opcode_ex.src_values[1], + opcode_ex.dest_values[0]); +#endif for(k=0;karrays[insn->dest_args[k]] + program->vars[insn->dest_args[k]].size*i; + if (opcode->dest_size[k] == 0) continue; + switch (program->vars[insn->dest_args[k]].size) { case 1: - *(int8_t *)ptr = opcode_ex.values[k]; + *(int8_t *)ptr = opcode_ex.dest_values[k]; break; case 2: - *(int16_t *)ptr = opcode_ex.values[k]; + *(int16_t *)ptr = opcode_ex.dest_values[k]; break; case 4: - *(int32_t *)ptr = opcode_ex.values[k]; + *(int32_t *)ptr = opcode_ex.dest_values[k]; break; default: - ORC_ERROR("ack"); + ORC_ERROR("unhandled size %d", program->vars[insn->dest_args[k]].size); } } } diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index cc019ee..33435b1 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -30,12 +30,12 @@ static OrcTarget *default_target; #define ORC_UL_MAX 4294967295U #define ORC_UL_MIN 0 -#define ORC_CLAMP_SB(x) CLAMP(x,ORC_SB_MIN,ORC_SB_MAX) -#define ORC_CLAMP_UB(x) CLAMP(x,ORC_UB_MIN,ORC_UB_MAX) -#define ORC_CLAMP_SW(x) CLAMP(x,ORC_SW_MIN,ORC_SW_MAX) -#define ORC_CLAMP_UW(x) CLAMP(x,ORC_UW_MIN,ORC_UW_MAX) -#define ORC_CLAMP_SL(x) CLAMP(x,ORC_SL_MIN,ORC_SL_MAX) -#define ORC_CLAMP_UL(x) CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) +#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX) +#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX) +#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX) +#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX) +#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX) +#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) void @@ -212,111 +212,102 @@ orc_opcode_find_by_name (const char *name) static void convsbw (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = (int8_t)(ex->values[1]); + ex->dest_values[0] = (int8_t)(ex->src_values[0]); } static void convubw (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = (uint8_t)(ex->values[1]); + ex->dest_values[0] = (uint8_t)(ex->src_values[0]); } static void convswl (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = (int8_t)(ex->values[1]); + ex->dest_values[0] = (int16_t)(ex->src_values[0]); } static void convuwl (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = (uint8_t)(ex->values[1]); + ex->dest_values[0] = (uint16_t)(ex->src_values[0]); } static void convwb (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = (int16_t)(ex->values[1]); + ex->dest_values[0] = (int16_t)(ex->src_values[0]); } static void convssswb (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_SB((int16_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_SB((int16_t)(ex->src_values[0])); } static void convsuswb (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_UB((int16_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_UB((int16_t)(ex->src_values[0])); } static void convusswb (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_SB((uint16_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_SB((uint16_t)(ex->src_values[0])); } static void convuuswb (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_UB((uint16_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_UB((uint16_t)(ex->src_values[0])); } static void convlw (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = (int32_t)(ex->values[1]); + ex->dest_values[0] = (int32_t)(ex->src_values[0]); } static void convssslw (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_SW((int32_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_SW((int32_t)(ex->src_values[0])); } static void convsuslw (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_UW((int32_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_UW((int32_t)(ex->src_values[0])); } static void convusslw (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_SW((uint32_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_SW((uint32_t)(ex->src_values[0])); } static void convuuslw (OrcOpcodeExecutor *ex, void *user) { - ex->values[0] = ORC_CLAMP_UW((uint32_t)(ex->values[1])); + ex->dest_values[0] = ORC_CLAMP_UW((uint32_t)(ex->src_values[0])); } #define UNARY(name,type,code) \ static void \ name (OrcOpcodeExecutor *ex, void *user) \ { \ - int a = ex->values[1]; \ - ex->values[0] = ( type )( code ); \ + int a = ex->src_values[0]; \ + ex->dest_values[0] = ( type )( code ); \ } #define BINARY(name,type,code) \ static void \ name (OrcOpcodeExecutor *ex, void *user) \ { \ - int a = ex->values[1]; \ - int b = ex->values[2]; \ - ex->values[0] = ( type )( code ); \ -} - -#define BINARY_U(name,type,code) \ -static void \ -name (OrcOpcodeExecutor *ex, void *user) \ -{ \ - unsigned int a = ex->values[1]; \ - unsigned int b = ex->values[2]; \ - ex->values[0] = ( type )( code ); \ + int a = (type) ex->src_values[0]; \ + int b = (type) ex->src_values[1]; \ + ex->dest_values[0] = ( type )( code ); \ } #define UNARY_SB(name,code) UNARY(name, int8_t, code) @@ -333,10 +324,10 @@ name (OrcOpcodeExecutor *ex, void *user) \ UNARY_SB(absb, (a<0)?-a:a) BINARY_SB(addb, a + b) -BINARY_SB(addssb, CLAMP(ORC_SB_MIN,ORC_SB_MAX,a + b)) -BINARY_UB(addusb, CLAMP(0,ORC_UB_MAX,a + b)) +BINARY_SB(addssb, ORC_CLAMP_SB(a + b)) +BINARY_UB(addusb, ORC_CLAMP_UB(a + b)) BINARY_SB(andb, a & b) -BINARY_SB(andnb, a & (~b)) +BINARY_SB(andnb, (~a) & b) BINARY_SB(avgsb, (a + b + 1)>>1) BINARY_UB(avgub, (a + b + 1)>>1) BINARY_SB(cmpeqb, (a == b) ? (~0) : 0) @@ -352,19 +343,19 @@ BINARY_UB(mulhub, (a * b) >> 8) BINARY_SB(orb, a | b) BINARY_SB(shlb, a << b) BINARY_SB(shrsb, a >> b) -BINARY_UB(shrub, ((uint8_t)a) >> b) -UNARY_SB(signb, CLAMP(-1,1,a)) +BINARY_UB(shrub, (a) >> b) +UNARY_SB(signb, ORC_CLAMP(a,-1,1)) BINARY_SB(subb, a - b) -BINARY_SB(subssb, CLAMP(ORC_SB_MIN,ORC_SB_MAX,a - b)) -BINARY_UB(subusb, CLAMP(0,ORC_UB_MAX,(uint8_t)a - (uint8_t)b)) +BINARY_SB(subssb, ORC_CLAMP_SB(a - b)) +BINARY_UB(subusb, ORC_CLAMP_UB(a - b)) BINARY_SB(xorb, a ^ b) UNARY_SW(absw, (a<0)?-a:a) BINARY_SW(addw, a + b) -BINARY_SW(addssw, CLAMP(ORC_SW_MIN,ORC_SW_MAX,a + b)) -BINARY_UW(addusw, CLAMP(0,ORC_UW_MAX,a + b)) +BINARY_SW(addssw, ORC_CLAMP_SW(a + b)) +BINARY_UW(addusw, ORC_CLAMP_UW(a + b)) BINARY_SW(andw, a & b) -BINARY_SW(andnw, a & (~b)) +BINARY_SW(andnw, (~a) & b) BINARY_SW(avgsw, (a + b + 1)>>1) BINARY_UW(avguw, (a + b + 1)>>1) BINARY_SW(cmpeqw, (a == b) ? (~0) : 0) @@ -381,27 +372,27 @@ BINARY_SW(orw, a | b) BINARY_SW(shlw, a << b) BINARY_SW(shrsw, a >> b) BINARY_UW(shruw, a >> b) -UNARY_SW(signw, CLAMP(-1,1,a)) +UNARY_SW(signw, ORC_CLAMP(a,-1,1)) BINARY_SW(subw, a - b) -BINARY_SW(subssw, CLAMP(ORC_SW_MIN,ORC_SW_MAX,a - b)) -BINARY_UW(subusw, CLAMP(0,ORC_UW_MAX,a - b)) +BINARY_SW(subssw, ORC_CLAMP_SW(a - b)) +BINARY_UW(subusw, ORC_CLAMP_UW(a - b)) BINARY_SW(xorw, a ^ b) UNARY_SL(absl, (a<0)?-a:a) BINARY_SL(addl, a + b) -BINARY_SL(addssl, CLAMP(ORC_SL_MIN,ORC_SL_MAX,(int64_t)a + (int64_t)b)) -BINARY_UL(addusl, CLAMP(0,ORC_UL_MAX,(uint64_t)a + (uint64_t)b)) +BINARY_SL(addssl, ORC_CLAMP_SL((int64_t)a + (int64_t)b)) +BINARY_UL(addusl, ORC_CLAMP_UL((uint64_t)a + (uint64_t)b)) BINARY_SL(andl, a & b) -BINARY_SL(andnl, a & (~b)) +BINARY_SL(andnl, (~a) & b) BINARY_SL(avgsl, (a + b + 1)>>1) BINARY_UL(avgul, (a + b + 1)>>1) BINARY_SL(cmpeql, (a == b) ? (~0) : 0) BINARY_SL(cmpgtsl, (a > b) ? (~0) : 0) UNARY_SL(copyl, a) BINARY_SL(maxsl, (a > b) ? a : b) -BINARY_UL(maxul, (a > b) ? a : b) +BINARY_UL(maxul, ((uint32_t)a > (uint32_t)b) ? a : b) BINARY_SL(minsl, (a < b) ? a : b) -BINARY_UL(minul, (a < b) ? a : b) +BINARY_UL(minul, ((uint32_t)a < (uint32_t)b) ? a : b) BINARY_SL(mulll, (a * b) & 0xffffffff) BINARY_SL(mulhsl, ((int64_t)a * (int64_t)b) >> 32) BINARY_UL(mulhul, ((uint64_t)a * (uint64_t)b) >> 32) @@ -409,10 +400,10 @@ BINARY_SL(orl, a | b) BINARY_SL(shll, a << b) BINARY_SL(shrsl, a >> b) BINARY_UL(shrul, a >> b) -UNARY_SL(signl, CLAMP(-1,1,a)) +UNARY_SL(signl, ORC_CLAMP(a,-1,1)) BINARY_SL(subl, a - b) -BINARY_SL(subssl, CLAMP(ORC_SL_MIN,ORC_SL_MAX,(int64_t)a - (int64_t)b)) -BINARY_UL(subusl, CLAMP(0,ORC_UL_MAX,(uint64_t)a - (uint64_t)b)) +BINARY_SL(subssl, ORC_CLAMP_SL((int64_t)a - (int64_t)b)) +BINARY_UL(subusl, ORC_CLAMP_UL((uint64_t)a - (uint64_t)b)) BINARY_SL(xorl, a ^ b) @@ -420,8 +411,8 @@ BINARY_SL(xorl, a ^ b) static void \ name (OrcOpcodeExecutor *ex, void *user) \ { \ - ex->values[0] = ((type2)(type1)ex->values[1]) * \ - ((type2)(type1)ex->values[2]); \ + ex->dest_values[0] = ((type2)(type1)ex->src_values[0]) * \ + ((type2)(type1)ex->src_values[1]); \ } MUL(mulsbw, int8_t, int16_t) @@ -537,11 +528,11 @@ static OrcStaticOpcode opcodes[] = { { "convusswb", convusswb, NULL, { 1 }, { 2 } }, { "convuuswb", convuuswb, NULL, { 1 }, { 2 } }, - { "convlw", convlw, NULL, { 1 }, { 2 } }, - { "convssslw", convssslw, NULL, { 1 }, { 2 } }, - { "convsuslw", convsuslw, NULL, { 1 }, { 2 } }, - { "convusslw", convusslw, NULL, { 1 }, { 2 } }, - { "convuuslw", convuuslw, NULL, { 1 }, { 2 } }, + { "convlw", convlw, NULL, { 2 }, { 4 } }, + { "convssslw", convssslw, NULL, { 2 }, { 4 } }, + { "convsuslw", convsuslw, NULL, { 2 }, { 4 } }, + { "convusslw", convusslw, NULL, { 2 }, { 4 } }, + { "convuuslw", convuuslw, NULL, { 2 }, { 4 } }, #ifdef ENABLE_64BIT { "convql", convql, NULL, { 4 }, { 8 } }, diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 8efb1dd..9f06491 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -93,7 +93,21 @@ orc_compiler_sse_init (OrcCompiler *compiler) compiler->tmpreg = X86_XMM0; compiler->valid_regs[compiler->tmpreg] = 0; - compiler->loop_shift = 3; + switch (orc_program_get_max_var_size (compiler->program)) { + case 1: + compiler->loop_shift = 4; + break; + case 2: + compiler->loop_shift = 3; + break; + case 4: + compiler->loop_shift = 2; + break; + default: + ORC_ERROR("unhandled max var size %d", + orc_program_get_max_var_size (compiler->program)); + break; + } } void @@ -211,24 +225,41 @@ sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var) } } +int +get_shift (int size) +{ + switch (size) { + case 1: + return 0; + case 2: + return 1; + case 4: + return 2; + default: + ORC_ERROR("bad size %d", size); + } + return -1; +} void orc_compiler_sse_assemble (OrcCompiler *compiler) { - int dest_var = orc_compiler_get_dest (compiler); + int dest_var; + int dest_shift; + + dest_var = orc_compiler_get_dest (compiler); + dest_shift = get_shift (compiler->vars[dest_var].size); compiler->vars[dest_var].is_aligned = FALSE; x86_emit_prologue (compiler); if (compiler->loop_shift > 0) { - x86_emit_mov_imm_reg (compiler, 4, 16, X86_EAX); x86_emit_sub_memoffset_reg (compiler, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[dest_var]), x86_exec_ptr, X86_EAX); x86_emit_and_imm_reg (compiler, 4, 15, X86_EAX); - /* FIXME size shift */ - x86_emit_sar_imm_reg (compiler, 4, 1, X86_EAX); + x86_emit_sar_imm_reg (compiler, 4, dest_shift, X86_EAX); x86_emit_cmp_reg_memoffset (compiler, 4, X86_EAX, (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr); diff --git a/orc/orcprogram.c b/orc/orcprogram.c index 4321be5..2a0fd5f 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -268,3 +268,17 @@ orc_program_get_asm_code (OrcProgram *program) return program->asm_code; } +int +orc_program_get_max_var_size (OrcProgram *program) +{ + int i; + int max; + + max = 0; + for(i=0;in_vars;i++){ + max = MAX(max, program->vars[i].size); + } + + return max; +} + diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 379e35b..4b3883b 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -212,7 +212,8 @@ struct _OrcCompiler { }; struct _OrcOpcodeExecutor { - int values[ORC_N_VARIABLES]; + int src_values[ORC_STATIC_OPCODE_N_SRC]; + int dest_values[ORC_STATIC_OPCODE_N_DEST]; }; struct _OrcExecutor { @@ -318,6 +319,7 @@ void orc_compiler_append_code (OrcCompiler *p, const char *fmt, ...); void orc_target_register (OrcTarget *target); OrcTarget *orc_target_get_by_name (const char *target_name); +int orc_program_get_max_var_size (OrcProgram *program); #endif diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 7f5361d..0315c40 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -46,6 +46,26 @@ sse_emit_660f38 (OrcCompiler *p, const char *insn_name, int code, x86_emit_modrm_reg (p, src, dest); } +static void +sse_emit_load_imm (OrcCompiler *p, int reg, int value) +{ + x86_emit_mov_imm_reg (p, 4, value, X86_ECX); + + ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6e; + x86_emit_modrm_reg (p, X86_ECX, reg); + + ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x70; + x86_emit_modrm_reg (p, reg, reg); + *p->codeptr++ = 0x00; +} + /* sse rules */ void @@ -66,12 +86,15 @@ sse_emit_loadiw (OrcCompiler *p, int reg, int value) x86_emit_modrm_reg (p, reg, reg); } else if (value == 1) { + sse_emit_660f (p, "pcmpeqw", 0x75, reg, reg); +#if 0 ORC_ASM_CODE(p," pcmpeqw %%%s, %%%s\n", x86_get_regname_sse(reg), x86_get_regname_sse(reg)); *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0x75; x86_emit_modrm_reg (p, reg, reg); +#endif ORC_ASM_CODE(p," psrlw $15, %%%s\n", x86_get_regname_sse(reg)); *p->codeptr++ = 0x66; @@ -139,13 +162,13 @@ sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_emit_66_rex_0f (OrcCompiler *p, OrcInstruction *insn, int code, - const char *insn_name) + const char *insn_name, int src, int dest) { ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, - x86_get_regname_sse(p->vars[insn->src_args[1]].alloc), - x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc)); + x86_get_regname_sse(src), x86_get_regname_sse(dest)); *p->codeptr++ = 0x66; + x86_emit_rex (p, 0, src, 0, dest); *p->codeptr++ = 0x0f; if (code & 0xff00) { *p->codeptr++ = code >> 8; @@ -153,22 +176,25 @@ sse_emit_66_rex_0f (OrcCompiler *p, OrcInstruction *insn, int code, } else { *p->codeptr++ = code; } - x86_emit_modrm_reg (p, p->vars[insn->src_args[1]].alloc, - p->vars[insn->dest_args[0]].alloc); + x86_emit_modrm_reg (p, src, dest); } #define UNARY(opcode,insn_name,code) \ static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - sse_emit_66_rex_0f (p, insn, code, insn_name); \ + sse_emit_66_rex_0f (p, insn, code, insn_name, \ + p->vars[insn->src_args[0]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ } #define BINARY(opcode,insn_name,code) \ static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - sse_emit_66_rex_0f (p, insn, code, insn_name); \ + sse_emit_66_rex_0f (p, insn, code, insn_name, \ + p->vars[insn->src_args[1]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ } @@ -189,7 +215,7 @@ BINARY(minub,"pminub",0xda) //BINARY(mulhsb,"pmulhb",0xe5) //BINARY(mulhub,"pmulhub",0xe4) BINARY(orb,"por",0xeb) -UNARY(signb,"psignb",0x3808) +//UNARY(signb,"psignb",0x3808) BINARY(subb,"psubb",0xf8) BINARY(subssb,"psubsb",0xe8) BINARY(subusb,"psubusb",0xd8) @@ -212,7 +238,7 @@ BINARY(mullw,"pmullw",0xd5) BINARY(mulhsw,"pmulhw",0xe5) BINARY(mulhuw,"pmulhuw",0xe4) BINARY(orw,"por",0xeb) -UNARY(signw,"psignw",0x3809) +//UNARY(signw,"psignw",0x3809) BINARY(subw,"psubw",0xf9) BINARY(subssw,"psubsw",0xe9) BINARY(subusw,"psubusw",0xd9) @@ -235,13 +261,46 @@ BINARY(mulll,"pmulld",0x3840) //BINARY(mulhsl,"pmulhd",0xe5) //BINARY(mulhul,"pmulhud",0xe4) BINARY(orl,"por",0xeb) -UNARY(signl,"psignd",0x380a) +//UNARY(signl,"psignd",0x380a) BINARY(subl,"psubd",0xfa) //BINARY(subssl,"psubsd",0xe9) //BINARY(subusl,"psubusd",0xd9) BINARY(xorl,"pxor",0xef) +static void +sse_rule_signX (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int imm_vals[] = { 0x01010101, 0x00010001, 0x00000001 }; + const char * names[] = { "psignb", "psignw", "psignd" }; + int codes[] = { 0x08, 0x09, 0x0a }; + + if (src == dest) { + sse_emit_660f (p, "movdqa", 0x6f, src, p->tmpreg); + src = p->tmpreg; + } + + x86_emit_mov_imm_reg (p, 4, imm_vals[((int)user)], X86_ECX); + + ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(dest)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6e; + x86_emit_modrm_reg (p, X86_ECX, dest); + + ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(dest), + x86_get_regname_sse(dest)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x70; + x86_emit_modrm_reg (p, dest, dest); + *p->codeptr++ = 0x00; + + sse_emit_660f38 (p, names[((int)user)], codes[((int)user)], src, dest); +} + #if 0 static void sse_rule_shlw (OrcCompiler *p, void *user, OrcInstruction *insn) @@ -547,7 +606,7 @@ sse_rule_convsuslw (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_mulswl (OrcCompiler *p, void *user, OrcInstruction *insn) { - int src = p->vars[insn->src_args[0]].alloc; + int src = p->vars[insn->src_args[1]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; int tmp = p->tmpreg; @@ -557,6 +616,185 @@ sse_rule_mulswl (OrcCompiler *p, void *user, OrcInstruction *insn) sse_emit_660f (p, "punpcklwd", 0x61, tmp, dest); } +static void +sse_rule_maxuw_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + x86_emit_mov_imm_reg (p, 4, 0x80008000, X86_ECX); + + ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(tmp)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6e; + x86_emit_modrm_reg (p, X86_ECX, tmp); + + ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(tmp), + x86_get_regname_sse(tmp)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x70; + x86_emit_modrm_reg (p, tmp, tmp); + *p->codeptr++ = 0x00; + + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); + sse_emit_660f (p, "pmaxsw", 0xee, src, dest); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); +} + +static void +sse_rule_minuw_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_load_imm (p, tmp, 0x80808080); + + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); + sse_emit_660f (p, "pminsw", 0xea, src, dest); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); +} + +static void +sse_rule_avgsb_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_load_imm (p, tmp, 0x80808080); + + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); + sse_emit_660f (p, "pavgb", 0xe0, src, dest); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); +} + +static void +sse_rule_avgsw_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_load_imm (p, tmp, 0x80008000); + + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); + sse_emit_660f (p, "pavgw", 0xe3, src, dest); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); +} + +static void +sse_rule_maxsb_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_660f (p, "movdqa", 0x6f, dest, tmp); + sse_emit_660f (p, "pcmpgtb", 0x64, src, tmp); + sse_emit_660f (p, "pand", 0xdb, tmp, dest); + sse_emit_660f (p, "pandn", 0xdf, src, tmp); + sse_emit_660f (p, "por", 0xeb, tmp, dest); +} + +static void +sse_rule_minsb_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_660f (p, "movdqa", 0x6f, src, tmp); + sse_emit_660f (p, "pcmpgtb", 0x64, dest, tmp); + sse_emit_660f (p, "pand", 0xdb, tmp, dest); + sse_emit_660f (p, "pandn", 0xdf, src, tmp); + sse_emit_660f (p, "por", 0xeb, tmp, dest); +} + +static void +sse_rule_maxsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_660f (p, "movdqa", 0x6f, dest, tmp); + sse_emit_660f (p, "pcmpgtd", 0x66, src, tmp); + sse_emit_660f (p, "pand", 0xdb, tmp, dest); + sse_emit_660f (p, "pandn", 0xdf, src, tmp); + sse_emit_660f (p, "por", 0xeb, tmp, dest); +} + +static void +sse_rule_minsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_660f (p, "movdqa", 0x6f, src, tmp); + sse_emit_660f (p, "pcmpgtd", 0x66, dest, tmp); + sse_emit_660f (p, "pand", 0xdb, tmp, dest); + sse_emit_660f (p, "pandn", 0xdf, src, tmp); + sse_emit_660f (p, "por", 0xeb, tmp, dest); +} + +static void +sse_rule_maxul_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_load_imm (p, tmp, 0x80000000); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); + + sse_emit_660f (p, "movdqa", 0x6f, dest, tmp); + sse_emit_660f (p, "pcmpgtd", 0x66, src, tmp); + sse_emit_660f (p, "pand", 0xdb, tmp, dest); + sse_emit_660f (p, "pandn", 0xdf, src, tmp); + sse_emit_660f (p, "por", 0xeb, tmp, dest); + + sse_emit_load_imm (p, tmp, 0x80000000); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); +} + +static void +sse_rule_minul_slow (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[1]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = p->tmpreg; + + sse_emit_load_imm (p, tmp, 0x80000000); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); + + sse_emit_660f (p, "movdqa", 0x6f, src, tmp); + sse_emit_660f (p, "pcmpgtd", 0x66, dest, tmp); + sse_emit_660f (p, "pand", 0xdb, tmp, dest); + sse_emit_660f (p, "pandn", 0xdf, src, tmp); + sse_emit_660f (p, "por", 0xeb, tmp, dest); + + sse_emit_load_imm (p, tmp, 0x80000000); + sse_emit_660f (p, "pxor", 0xef, tmp, src); + sse_emit_660f (p, "pxor", 0xef, tmp, dest); +} + + void orc_compiler_sse_register_rules (OrcTarget *target) { @@ -632,23 +870,34 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "convswl", sse_rule_convswl, NULL); orc_rule_register (rule_set, "convuwl", sse_rule_convuwl, NULL); orc_rule_register (rule_set, "convssslw", sse_rule_convssslw, NULL); - orc_rule_register (rule_set, "convsuslw", sse_rule_convsuslw, NULL); - orc_rule_register (rule_set, "convlw", sse_rule_convlw, NULL); orc_rule_register (rule_set, "mulswl", sse_rule_mulswl, NULL); + /* slow rules */ + orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL); + orc_rule_register (rule_set, "minuw", sse_rule_minuw_slow, NULL); + orc_rule_register (rule_set, "avgsb", sse_rule_avgsb_slow, NULL); + orc_rule_register (rule_set, "avgsw", sse_rule_avgsw_slow, NULL); + orc_rule_register (rule_set, "maxsb", sse_rule_maxsb_slow, NULL); + orc_rule_register (rule_set, "minsb", sse_rule_minsb_slow, NULL); + orc_rule_register (rule_set, "maxsl", sse_rule_maxsl_slow, NULL); + orc_rule_register (rule_set, "minsl", sse_rule_minsl_slow, NULL); + orc_rule_register (rule_set, "maxul", sse_rule_maxul_slow, NULL); + orc_rule_register (rule_set, "minul", sse_rule_minul_slow, NULL); + /* SSE 3 -- no rules */ /* SSSE 3 */ rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); - REG(signb); - REG(signw); - REG(signl); + orc_rule_register (rule_set, "signb", sse_rule_signX, (void *)0); + orc_rule_register (rule_set, "signw", sse_rule_signX, (void *)1); + orc_rule_register (rule_set, "signl", sse_rule_signX, (void *)2); REG(absb); REG(absw); REG(absl); +if (0) { /* SSE 4.1 */ rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); @@ -661,6 +910,9 @@ orc_compiler_sse_register_rules (OrcTarget *target) REG(minsl); REG(minul); REG(mulll); + orc_rule_register (rule_set, "convsuslw", sse_rule_convsuslw, NULL); + orc_rule_register (rule_set, "convlw", sse_rule_convlw, NULL); +} /* SSE 4.2 -- no rules */ diff --git a/orc/orcutils.h b/orc/orcutils.h index 3114e4b..511773f 100644 --- a/orc/orcutils.h +++ b/orc/orcutils.h @@ -49,8 +49,8 @@ typedef unsigned int orc_bool; #ifndef MAX #define MAX(a,b) ((a)>(b) ? (a) : (b)) #endif -#ifndef CLAMP -#define CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) +#ifndef ORC_CLAMP +#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) #endif #define ROUND_UP_SHIFT(x,y) (((x) + (1<<(y)) - 1)>>(y)) #define ROUND_UP_POW2(x,y) (((x) + (1<<(y)) - 1)&((~0)<<(y))) diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am index d5b47ed..90c3e6b 100644 --- a/testsuite/Makefile.am +++ b/testsuite/Makefile.am @@ -1,8 +1,8 @@ -TESTS = test1 test2 test3 test4 test5 +TESTS = test1 test2 test3 test4 test5 test_local_opcode_execution -noinst_PROGRAMS = test1 test2 test3 test4 test5 +noinst_PROGRAMS = test1 test2 test3 test4 test5 test_local_opcode_execution AM_CFLAGS = $(ORC_CFLAGS) -LIBS = $(ORC_LIBS) +LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-0.3.la diff --git a/testsuite/test5.c b/testsuite/test5.c index a4968e4..6f13eab 100644 --- a/testsuite/test5.c +++ b/testsuite/test5.c @@ -9,7 +9,7 @@ int error = FALSE; -void test_opcode (const char *name); +void test_opcode (OrcStaticOpcode *opcode); int main (int argc, char *argv[]) @@ -28,7 +28,7 @@ main (int argc, char *argv[]) opcode_set->opcodes[i].src_size[0], opcode_set->opcodes[i].src_size[1], opcode_set->opcodes[i].emulate); - test_opcode (opcode_set->opcodes[i].name); + test_opcode (opcode_set->opcodes + i); } if (error) return 1; @@ -36,7 +36,7 @@ main (int argc, char *argv[]) } void -test_opcode (const char *name) +test_opcode (OrcStaticOpcode *opcode) { OrcProgram *p; char s[40]; @@ -44,13 +44,18 @@ test_opcode (const char *name) int ret; FILE *file; - p = orc_program_new_dss (2,2,2); + if (opcode->src_size[1] == 0) { + p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } - sprintf(s, "test_%s", name); + sprintf(s, "test_%s", opcode->name); orc_program_set_name (p, s); - orc_program_add_constant (p, 2, 1, "c1"); + //orc_program_add_constant (p, 2, 1, "c1"); - orc_program_append_str (p, name, "d1", "s1", "c1"); + orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); ret = orc_program_compile (p); if (!ret) { diff --git a/testsuite/test_local_opcode_execution.c b/testsuite/test_local_opcode_execution.c new file mode 100644 index 0000000..8cbbce0 --- /dev/null +++ b/testsuite/test_local_opcode_execution.c @@ -0,0 +1,65 @@ + +#include "config.h" + +#include + +#include +#include + + +int error = FALSE; + +void test_opcode (OrcStaticOpcode *opcode); + +int +main (int argc, char *argv[]) +{ + int i; + OrcOpcodeSet *opcode_set; + + orc_test_init(); + orc_init(); + + opcode_set = orc_opcode_set_get ("sys"); + + for(i=0;in_opcodes;i++){ + printf("/* %s %d,%d,%d */\n", + opcode_set->opcodes[i].name, + opcode_set->opcodes[i].dest_size[0], + opcode_set->opcodes[i].src_size[0], + opcode_set->opcodes[i].src_size[1]); + test_opcode (opcode_set->opcodes + i); + } + + if (error) return 1; + return 0; +} + +void +test_opcode (OrcStaticOpcode *opcode) +{ + OrcProgram *p; + char s[40]; + int ret; + + if (opcode->src_size[1] == 0) { + p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } + + sprintf(s, "test_%s", opcode->name); + orc_program_set_name (p, s); + + orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); + + ret = orc_test_compare_output (p); + if (!ret) { + error = TRUE; + } + + orc_program_free (p); +} + + diff --git a/tools/orcc.c b/tools/orcc.c index 8c8324f..d18ad11 100644 --- a/tools/orcc.c +++ b/tools/orcc.c @@ -23,10 +23,18 @@ main (int argc, char *argv[]) n = orc_parse (code, &programs); +#if 1 for(i=0;iname); orc_test_gcc_compile (programs[i]); } +#endif +#if 0 + for(i=0;i