#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
void
{
orc_init ();
+ setvbuf (stdout, NULL, _IONBF, 0);
}
}
+void
+orc_test_random_bits (void *data, int n_bytes)
+{
+ uint8_t *d = data;
+ int i;
+ for(i=0;i<n_bytes;i++){
+ d[i] = random();
+ }
+}
+
+static int
+print_array_val (void *array, int size, int i)
+{
+ switch (size) {
+ case 1:
+ {
+ int8_t *a = array;
+ printf(" %4d", a[i]);
+ return a[i];
+ }
+ break;
+ case 2:
+ {
+ int16_t *a = array;
+ printf(" %5d", a[i]);
+ return a[i];
+ }
+ break;
+ case 4:
+ {
+ int32_t *a = array;
+ printf(" %10d", a[i]);
+ return a[i];
+ }
+ break;
+ default:
+ return -1;
+ }
+}
+
+int
+orc_test_compare_output (OrcProgram *program)
+{
+ OrcExecutor *ex;
+ int ret;
+ int n = 64;
+ int dest_index;
+ void *dest_exec;
+ void *dest_emul;
+ int i;
+
+ ret = orc_program_compile (program);
+ if (!ret) {
+ return TRUE;
+ }
+
+ ex = orc_executor_new (program);
+ orc_executor_set_n (ex, n);
+
+ dest_index = -1;
+ for(i=0;i<program->n_vars;i++){
+ if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) {
+ uint8_t *data;
+ data = malloc(n*program->vars[i].size);
+ orc_test_random_bits (data, n*program->vars[i].size);
+ orc_executor_set_array (ex, i, data);
+ } else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) {
+ dest_index = i;
+ } else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) {
+ orc_executor_set_parameter (ex, i, 2);
+ }
+ }
+ if (dest_index == -1) {
+ return FALSE;
+ }
+
+ dest_exec = malloc(n*program->vars[dest_index].size);
+ memset (dest_exec, 0xa5, n*program->vars[dest_index].size);
+ dest_emul = malloc(n*program->vars[dest_index].size);
+ memset (dest_emul, 0xa5, n*program->vars[dest_index].size);
+
+ orc_executor_set_array (ex, dest_index, dest_exec);
+ orc_executor_run (ex);
+
+ orc_executor_set_array (ex, dest_index, dest_emul);
+ orc_executor_emulate (ex);
+
+ if (memcmp (dest_exec, dest_emul, n*program->vars[dest_index].size) != 0) {
+ for(i=0;i<n;i++){
+ int a,b;
+ int j;
+
+ printf("%2d:", i);
+
+ for(j=0;j<program->n_vars;j++){
+ if (program->vars[j].vartype == ORC_VAR_TYPE_SRC &&
+ program->vars[j].size > 0) {
+ print_array_val (ex->arrays[j], program->vars[j].size, i);
+ }
+ }
+
+ printf(" ->");
+ a = print_array_val (dest_emul, program->vars[dest_index].size, i);
+ b = print_array_val (dest_exec, program->vars[dest_index].size, i);
+
+ if (a != b) {
+ printf(" *");
+ }
+
+ printf("\n");
+ }
+#if 0
+ switch (program->vars[dest_index].size) {
+ case 1:
+ {
+ uint8_t *a = dest_emul;
+ uint8_t *b = dest_exec;
+ for(i=0;i<n;i++){
+ printf("%d: %d %d %c\n", i, a[i], b[i], (a[i]==b[i])?' ':'*');
+ }
+ }
+ break;
+ case 2:
+ {
+ uint16_t *a = dest_emul;
+ uint16_t *b = dest_exec;
+ for(i=0;i<n;i++){
+ printf("%d: %d %d %c\n", i, a[i], b[i], (a[i]==b[i])?' ':'*');
+ }
+ }
+ break;
+ case 4:
+ {
+ uint32_t *a = dest_emul;
+ uint32_t *b = dest_exec;
+ for(i=0;i<n;i++){
+ printf("%d: %d %d %c\n", i, a[i], b[i], (a[i]==b[i])?' ':'*');
+ }
+ }
+ break;
+ default:
+ return FALSE;
+ }
+#endif
+
+ printf("%s", orc_program_get_asm_code (program));
+
+ return FALSE;
+ }
+
+ orc_executor_free (ex);
+
+ return TRUE;
+}
+
+
void orc_test_init (void);
int orc_test_gcc_compile (OrcProgram *p);
+void orc_test_random_bits (void *data, int n_bytes);
+int orc_test_compare_output (OrcProgram *program);
+
ORC_END_DECLS
OrcStaticOpcode *opcode;
OrcOpcodeExecutor opcode_ex;
+ memset (&opcode_ex, 0, sizeof(opcode_ex));
+
for(i=0;i<ex->n;i++){
for(j=0;j<program->n_insns;j++){
insn = program->insns + j;
void *ptr = ex->arrays[insn->src_args[k]] +
program->vars[insn->src_args[k]].size*i;
+ if (opcode->src_size[k] == 0) continue;
+
switch (program->vars[insn->src_args[k]].size) {
case 1:
- opcode_ex.values[k] = *(int8_t *)ptr;
+ opcode_ex.src_values[k] = *(int8_t *)ptr;
break;
case 2:
- opcode_ex.values[k] = *(int16_t *)ptr;
+ opcode_ex.src_values[k] = *(int16_t *)ptr;
break;
case 4:
- opcode_ex.values[k] = *(int32_t *)ptr;
+ opcode_ex.src_values[k] = *(int32_t *)ptr;
break;
default:
- ORC_ERROR("ack");
+ ORC_ERROR("unhandled size %d", program->vars[insn->src_args[k]].size);
}
}
opcode->emulate (&opcode_ex, opcode->emulate_user);
+#if 0
+ ORC_ERROR("emulate %s: %d %d -> %d",
+ opcode->name, opcode_ex.src_values[0], opcode_ex.src_values[1],
+ opcode_ex.dest_values[0]);
+#endif
for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++){
void *ptr = ex->arrays[insn->dest_args[k]] +
program->vars[insn->dest_args[k]].size*i;
+ if (opcode->dest_size[k] == 0) continue;
+
switch (program->vars[insn->dest_args[k]].size) {
case 1:
- *(int8_t *)ptr = opcode_ex.values[k];
+ *(int8_t *)ptr = opcode_ex.dest_values[k];
break;
case 2:
- *(int16_t *)ptr = opcode_ex.values[k];
+ *(int16_t *)ptr = opcode_ex.dest_values[k];
break;
case 4:
- *(int32_t *)ptr = opcode_ex.values[k];
+ *(int32_t *)ptr = opcode_ex.dest_values[k];
break;
default:
- ORC_ERROR("ack");
+ ORC_ERROR("unhandled size %d", program->vars[insn->dest_args[k]].size);
}
}
}
#define ORC_UL_MAX 4294967295U
#define ORC_UL_MIN 0
-#define ORC_CLAMP_SB(x) CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
-#define ORC_CLAMP_UB(x) CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
-#define ORC_CLAMP_SW(x) CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
-#define ORC_CLAMP_UW(x) CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
-#define ORC_CLAMP_SL(x) CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
-#define ORC_CLAMP_UL(x) CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
+#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
+#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
+#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
+#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
+#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
+#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
void
static void
convsbw (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = (int8_t)(ex->values[1]);
+ ex->dest_values[0] = (int8_t)(ex->src_values[0]);
}
static void
convubw (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = (uint8_t)(ex->values[1]);
+ ex->dest_values[0] = (uint8_t)(ex->src_values[0]);
}
static void
convswl (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = (int8_t)(ex->values[1]);
+ ex->dest_values[0] = (int16_t)(ex->src_values[0]);
}
static void
convuwl (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = (uint8_t)(ex->values[1]);
+ ex->dest_values[0] = (uint16_t)(ex->src_values[0]);
}
static void
convwb (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = (int16_t)(ex->values[1]);
+ ex->dest_values[0] = (int16_t)(ex->src_values[0]);
}
static void
convssswb (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_SB((int16_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_SB((int16_t)(ex->src_values[0]));
}
static void
convsuswb (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_UB((int16_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_UB((int16_t)(ex->src_values[0]));
}
static void
convusswb (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_SB((uint16_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_SB((uint16_t)(ex->src_values[0]));
}
static void
convuuswb (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_UB((uint16_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_UB((uint16_t)(ex->src_values[0]));
}
static void
convlw (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = (int32_t)(ex->values[1]);
+ ex->dest_values[0] = (int32_t)(ex->src_values[0]);
}
static void
convssslw (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_SW((int32_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_SW((int32_t)(ex->src_values[0]));
}
static void
convsuslw (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_UW((int32_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_UW((int32_t)(ex->src_values[0]));
}
static void
convusslw (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_SW((uint32_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_SW((uint32_t)(ex->src_values[0]));
}
static void
convuuslw (OrcOpcodeExecutor *ex, void *user)
{
- ex->values[0] = ORC_CLAMP_UW((uint32_t)(ex->values[1]));
+ ex->dest_values[0] = ORC_CLAMP_UW((uint32_t)(ex->src_values[0]));
}
#define UNARY(name,type,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
- int a = ex->values[1]; \
- ex->values[0] = ( type )( code ); \
+ int a = ex->src_values[0]; \
+ ex->dest_values[0] = ( type )( code ); \
}
#define BINARY(name,type,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
- int a = ex->values[1]; \
- int b = ex->values[2]; \
- ex->values[0] = ( type )( code ); \
-}
-
-#define BINARY_U(name,type,code) \
-static void \
-name (OrcOpcodeExecutor *ex, void *user) \
-{ \
- unsigned int a = ex->values[1]; \
- unsigned int b = ex->values[2]; \
- ex->values[0] = ( type )( code ); \
+ int a = (type) ex->src_values[0]; \
+ int b = (type) ex->src_values[1]; \
+ ex->dest_values[0] = ( type )( code ); \
}
#define UNARY_SB(name,code) UNARY(name, int8_t, code)
UNARY_SB(absb, (a<0)?-a:a)
BINARY_SB(addb, a + b)
-BINARY_SB(addssb, CLAMP(ORC_SB_MIN,ORC_SB_MAX,a + b))
-BINARY_UB(addusb, CLAMP(0,ORC_UB_MAX,a + b))
+BINARY_SB(addssb, ORC_CLAMP_SB(a + b))
+BINARY_UB(addusb, ORC_CLAMP_UB(a + b))
BINARY_SB(andb, a & b)
-BINARY_SB(andnb, a & (~b))
+BINARY_SB(andnb, (~a) & b)
BINARY_SB(avgsb, (a + b + 1)>>1)
BINARY_UB(avgub, (a + b + 1)>>1)
BINARY_SB(cmpeqb, (a == b) ? (~0) : 0)
BINARY_SB(orb, a | b)
BINARY_SB(shlb, a << b)
BINARY_SB(shrsb, a >> b)
-BINARY_UB(shrub, ((uint8_t)a) >> b)
-UNARY_SB(signb, CLAMP(-1,1,a))
+BINARY_UB(shrub, (a) >> b)
+UNARY_SB(signb, ORC_CLAMP(a,-1,1))
BINARY_SB(subb, a - b)
-BINARY_SB(subssb, CLAMP(ORC_SB_MIN,ORC_SB_MAX,a - b))
-BINARY_UB(subusb, CLAMP(0,ORC_UB_MAX,(uint8_t)a - (uint8_t)b))
+BINARY_SB(subssb, ORC_CLAMP_SB(a - b))
+BINARY_UB(subusb, ORC_CLAMP_UB(a - b))
BINARY_SB(xorb, a ^ b)
UNARY_SW(absw, (a<0)?-a:a)
BINARY_SW(addw, a + b)
-BINARY_SW(addssw, CLAMP(ORC_SW_MIN,ORC_SW_MAX,a + b))
-BINARY_UW(addusw, CLAMP(0,ORC_UW_MAX,a + b))
+BINARY_SW(addssw, ORC_CLAMP_SW(a + b))
+BINARY_UW(addusw, ORC_CLAMP_UW(a + b))
BINARY_SW(andw, a & b)
-BINARY_SW(andnw, a & (~b))
+BINARY_SW(andnw, (~a) & b)
BINARY_SW(avgsw, (a + b + 1)>>1)
BINARY_UW(avguw, (a + b + 1)>>1)
BINARY_SW(cmpeqw, (a == b) ? (~0) : 0)
BINARY_SW(shlw, a << b)
BINARY_SW(shrsw, a >> b)
BINARY_UW(shruw, a >> b)
-UNARY_SW(signw, CLAMP(-1,1,a))
+UNARY_SW(signw, ORC_CLAMP(a,-1,1))
BINARY_SW(subw, a - b)
-BINARY_SW(subssw, CLAMP(ORC_SW_MIN,ORC_SW_MAX,a - b))
-BINARY_UW(subusw, CLAMP(0,ORC_UW_MAX,a - b))
+BINARY_SW(subssw, ORC_CLAMP_SW(a - b))
+BINARY_UW(subusw, ORC_CLAMP_UW(a - b))
BINARY_SW(xorw, a ^ b)
UNARY_SL(absl, (a<0)?-a:a)
BINARY_SL(addl, a + b)
-BINARY_SL(addssl, CLAMP(ORC_SL_MIN,ORC_SL_MAX,(int64_t)a + (int64_t)b))
-BINARY_UL(addusl, CLAMP(0,ORC_UL_MAX,(uint64_t)a + (uint64_t)b))
+BINARY_SL(addssl, ORC_CLAMP_SL((int64_t)a + (int64_t)b))
+BINARY_UL(addusl, ORC_CLAMP_UL((uint64_t)a + (uint64_t)b))
BINARY_SL(andl, a & b)
-BINARY_SL(andnl, a & (~b))
+BINARY_SL(andnl, (~a) & b)
BINARY_SL(avgsl, (a + b + 1)>>1)
BINARY_UL(avgul, (a + b + 1)>>1)
BINARY_SL(cmpeql, (a == b) ? (~0) : 0)
BINARY_SL(cmpgtsl, (a > b) ? (~0) : 0)
UNARY_SL(copyl, a)
BINARY_SL(maxsl, (a > b) ? a : b)
-BINARY_UL(maxul, (a > b) ? a : b)
+BINARY_UL(maxul, ((uint32_t)a > (uint32_t)b) ? a : b)
BINARY_SL(minsl, (a < b) ? a : b)
-BINARY_UL(minul, (a < b) ? a : b)
+BINARY_UL(minul, ((uint32_t)a < (uint32_t)b) ? a : b)
BINARY_SL(mulll, (a * b) & 0xffffffff)
BINARY_SL(mulhsl, ((int64_t)a * (int64_t)b) >> 32)
BINARY_UL(mulhul, ((uint64_t)a * (uint64_t)b) >> 32)
BINARY_SL(shll, a << b)
BINARY_SL(shrsl, a >> b)
BINARY_UL(shrul, a >> b)
-UNARY_SL(signl, CLAMP(-1,1,a))
+UNARY_SL(signl, ORC_CLAMP(a,-1,1))
BINARY_SL(subl, a - b)
-BINARY_SL(subssl, CLAMP(ORC_SL_MIN,ORC_SL_MAX,(int64_t)a - (int64_t)b))
-BINARY_UL(subusl, CLAMP(0,ORC_UL_MAX,(uint64_t)a - (uint64_t)b))
+BINARY_SL(subssl, ORC_CLAMP_SL((int64_t)a - (int64_t)b))
+BINARY_UL(subusl, ORC_CLAMP_UL((uint64_t)a - (uint64_t)b))
BINARY_SL(xorl, a ^ b)
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
- ex->values[0] = ((type2)(type1)ex->values[1]) * \
- ((type2)(type1)ex->values[2]); \
+ ex->dest_values[0] = ((type2)(type1)ex->src_values[0]) * \
+ ((type2)(type1)ex->src_values[1]); \
}
MUL(mulsbw, int8_t, int16_t)
{ "convusswb", convusswb, NULL, { 1 }, { 2 } },
{ "convuuswb", convuuswb, NULL, { 1 }, { 2 } },
- { "convlw", convlw, NULL, { 1 }, { 2 } },
- { "convssslw", convssslw, NULL, { 1 }, { 2 } },
- { "convsuslw", convsuslw, NULL, { 1 }, { 2 } },
- { "convusslw", convusslw, NULL, { 1 }, { 2 } },
- { "convuuslw", convuuslw, NULL, { 1 }, { 2 } },
+ { "convlw", convlw, NULL, { 2 }, { 4 } },
+ { "convssslw", convssslw, NULL, { 2 }, { 4 } },
+ { "convsuslw", convsuslw, NULL, { 2 }, { 4 } },
+ { "convusslw", convusslw, NULL, { 2 }, { 4 } },
+ { "convuuslw", convuuslw, NULL, { 2 }, { 4 } },
#ifdef ENABLE_64BIT
{ "convql", convql, NULL, { 4 }, { 8 } },
compiler->tmpreg = X86_XMM0;
compiler->valid_regs[compiler->tmpreg] = 0;
- compiler->loop_shift = 3;
+ switch (orc_program_get_max_var_size (compiler->program)) {
+ case 1:
+ compiler->loop_shift = 4;
+ break;
+ case 2:
+ compiler->loop_shift = 3;
+ break;
+ case 4:
+ compiler->loop_shift = 2;
+ break;
+ default:
+ ORC_ERROR("unhandled max var size %d",
+ orc_program_get_max_var_size (compiler->program));
+ break;
+ }
}
void
}
}
+int
+get_shift (int size)
+{
+ switch (size) {
+ case 1:
+ return 0;
+ case 2:
+ return 1;
+ case 4:
+ return 2;
+ default:
+ ORC_ERROR("bad size %d", size);
+ }
+ return -1;
+}
void
orc_compiler_sse_assemble (OrcCompiler *compiler)
{
- int dest_var = orc_compiler_get_dest (compiler);
+ int dest_var;
+ int dest_shift;
+
+ dest_var = orc_compiler_get_dest (compiler);
+ dest_shift = get_shift (compiler->vars[dest_var].size);
compiler->vars[dest_var].is_aligned = FALSE;
x86_emit_prologue (compiler);
if (compiler->loop_shift > 0) {
-
x86_emit_mov_imm_reg (compiler, 4, 16, X86_EAX);
x86_emit_sub_memoffset_reg (compiler, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[dest_var]),
x86_exec_ptr, X86_EAX);
x86_emit_and_imm_reg (compiler, 4, 15, X86_EAX);
- /* FIXME size shift */
- x86_emit_sar_imm_reg (compiler, 4, 1, X86_EAX);
+ x86_emit_sar_imm_reg (compiler, 4, dest_shift, X86_EAX);
x86_emit_cmp_reg_memoffset (compiler, 4, X86_EAX,
(int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr);
return program->asm_code;
}
+int
+orc_program_get_max_var_size (OrcProgram *program)
+{
+ int i;
+ int max;
+
+ max = 0;
+ for(i=0;i<program->n_vars;i++){
+ max = MAX(max, program->vars[i].size);
+ }
+
+ return max;
+}
+
};
struct _OrcOpcodeExecutor {
- int values[ORC_N_VARIABLES];
+ int src_values[ORC_STATIC_OPCODE_N_SRC];
+ int dest_values[ORC_STATIC_OPCODE_N_DEST];
};
struct _OrcExecutor {
void orc_target_register (OrcTarget *target);
OrcTarget *orc_target_get_by_name (const char *target_name);
+int orc_program_get_max_var_size (OrcProgram *program);
#endif
x86_emit_modrm_reg (p, src, dest);
}
+static void
+sse_emit_load_imm (OrcCompiler *p, int reg, int value)
+{
+ x86_emit_mov_imm_reg (p, 4, value, X86_ECX);
+
+ ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(reg));
+ *p->codeptr++ = 0x66;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x6e;
+ x86_emit_modrm_reg (p, X86_ECX, reg);
+
+ ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
+ x86_get_regname_sse(reg));
+ *p->codeptr++ = 0x66;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x70;
+ x86_emit_modrm_reg (p, reg, reg);
+ *p->codeptr++ = 0x00;
+}
+
/* sse rules */
void
x86_emit_modrm_reg (p, reg, reg);
} else if (value == 1) {
+ sse_emit_660f (p, "pcmpeqw", 0x75, reg, reg);
+#if 0
ORC_ASM_CODE(p," pcmpeqw %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x75;
x86_emit_modrm_reg (p, reg, reg);
+#endif
ORC_ASM_CODE(p," psrlw $15, %%%s\n", x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
static void
sse_emit_66_rex_0f (OrcCompiler *p, OrcInstruction *insn, int code,
- const char *insn_name)
+ const char *insn_name, int src, int dest)
{
ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
- x86_get_regname_sse(p->vars[insn->src_args[1]].alloc),
- x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
+ x86_get_regname_sse(src), x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, src, 0, dest);
*p->codeptr++ = 0x0f;
if (code & 0xff00) {
*p->codeptr++ = code >> 8;
} else {
*p->codeptr++ = code;
}
- x86_emit_modrm_reg (p, p->vars[insn->src_args[1]].alloc,
- p->vars[insn->dest_args[0]].alloc);
+ x86_emit_modrm_reg (p, src, dest);
}
#define UNARY(opcode,insn_name,code) \
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- sse_emit_66_rex_0f (p, insn, code, insn_name); \
+ sse_emit_66_rex_0f (p, insn, code, insn_name, \
+ p->vars[insn->src_args[0]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
}
#define BINARY(opcode,insn_name,code) \
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- sse_emit_66_rex_0f (p, insn, code, insn_name); \
+ sse_emit_66_rex_0f (p, insn, code, insn_name, \
+ p->vars[insn->src_args[1]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
}
//BINARY(mulhsb,"pmulhb",0xe5)
//BINARY(mulhub,"pmulhub",0xe4)
BINARY(orb,"por",0xeb)
-UNARY(signb,"psignb",0x3808)
+//UNARY(signb,"psignb",0x3808)
BINARY(subb,"psubb",0xf8)
BINARY(subssb,"psubsb",0xe8)
BINARY(subusb,"psubusb",0xd8)
BINARY(mulhsw,"pmulhw",0xe5)
BINARY(mulhuw,"pmulhuw",0xe4)
BINARY(orw,"por",0xeb)
-UNARY(signw,"psignw",0x3809)
+//UNARY(signw,"psignw",0x3809)
BINARY(subw,"psubw",0xf9)
BINARY(subssw,"psubsw",0xe9)
BINARY(subusw,"psubusw",0xd9)
//BINARY(mulhsl,"pmulhd",0xe5)
//BINARY(mulhul,"pmulhud",0xe4)
BINARY(orl,"por",0xeb)
-UNARY(signl,"psignd",0x380a)
+//UNARY(signl,"psignd",0x380a)
BINARY(subl,"psubd",0xfa)
//BINARY(subssl,"psubsd",0xe9)
//BINARY(subusl,"psubusd",0xd9)
BINARY(xorl,"pxor",0xef)
+static void
+sse_rule_signX (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int imm_vals[] = { 0x01010101, 0x00010001, 0x00000001 };
+ const char * names[] = { "psignb", "psignw", "psignd" };
+ int codes[] = { 0x08, 0x09, 0x0a };
+
+ if (src == dest) {
+ sse_emit_660f (p, "movdqa", 0x6f, src, p->tmpreg);
+ src = p->tmpreg;
+ }
+
+ x86_emit_mov_imm_reg (p, 4, imm_vals[((int)user)], X86_ECX);
+
+ ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(dest));
+ *p->codeptr++ = 0x66;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x6e;
+ x86_emit_modrm_reg (p, X86_ECX, dest);
+
+ ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(dest),
+ x86_get_regname_sse(dest));
+ *p->codeptr++ = 0x66;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x70;
+ x86_emit_modrm_reg (p, dest, dest);
+ *p->codeptr++ = 0x00;
+
+ sse_emit_660f38 (p, names[((int)user)], codes[((int)user)], src, dest);
+}
+
#if 0
static void
sse_rule_shlw (OrcCompiler *p, void *user, OrcInstruction *insn)
static void
sse_rule_mulswl (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- int src = p->vars[insn->src_args[0]].alloc;
+ int src = p->vars[insn->src_args[1]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
int tmp = p->tmpreg;
sse_emit_660f (p, "punpcklwd", 0x61, tmp, dest);
}
+static void
+sse_rule_maxuw_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ x86_emit_mov_imm_reg (p, 4, 0x80008000, X86_ECX);
+
+ ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(tmp));
+ *p->codeptr++ = 0x66;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x6e;
+ x86_emit_modrm_reg (p, X86_ECX, tmp);
+
+ ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(tmp),
+ x86_get_regname_sse(tmp));
+ *p->codeptr++ = 0x66;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x70;
+ x86_emit_modrm_reg (p, tmp, tmp);
+ *p->codeptr++ = 0x00;
+
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+ sse_emit_660f (p, "pmaxsw", 0xee, src, dest);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+}
+
+static void
+sse_rule_minuw_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_load_imm (p, tmp, 0x80808080);
+
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+ sse_emit_660f (p, "pminsw", 0xea, src, dest);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+}
+
+static void
+sse_rule_avgsb_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_load_imm (p, tmp, 0x80808080);
+
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+ sse_emit_660f (p, "pavgb", 0xe0, src, dest);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+}
+
+static void
+sse_rule_avgsw_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_load_imm (p, tmp, 0x80008000);
+
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+ sse_emit_660f (p, "pavgw", 0xe3, src, dest);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+}
+
+static void
+sse_rule_maxsb_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_660f (p, "movdqa", 0x6f, dest, tmp);
+ sse_emit_660f (p, "pcmpgtb", 0x64, src, tmp);
+ sse_emit_660f (p, "pand", 0xdb, tmp, dest);
+ sse_emit_660f (p, "pandn", 0xdf, src, tmp);
+ sse_emit_660f (p, "por", 0xeb, tmp, dest);
+}
+
+static void
+sse_rule_minsb_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_660f (p, "movdqa", 0x6f, src, tmp);
+ sse_emit_660f (p, "pcmpgtb", 0x64, dest, tmp);
+ sse_emit_660f (p, "pand", 0xdb, tmp, dest);
+ sse_emit_660f (p, "pandn", 0xdf, src, tmp);
+ sse_emit_660f (p, "por", 0xeb, tmp, dest);
+}
+
+static void
+sse_rule_maxsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_660f (p, "movdqa", 0x6f, dest, tmp);
+ sse_emit_660f (p, "pcmpgtd", 0x66, src, tmp);
+ sse_emit_660f (p, "pand", 0xdb, tmp, dest);
+ sse_emit_660f (p, "pandn", 0xdf, src, tmp);
+ sse_emit_660f (p, "por", 0xeb, tmp, dest);
+}
+
+static void
+sse_rule_minsl_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_660f (p, "movdqa", 0x6f, src, tmp);
+ sse_emit_660f (p, "pcmpgtd", 0x66, dest, tmp);
+ sse_emit_660f (p, "pand", 0xdb, tmp, dest);
+ sse_emit_660f (p, "pandn", 0xdf, src, tmp);
+ sse_emit_660f (p, "por", 0xeb, tmp, dest);
+}
+
+static void
+sse_rule_maxul_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_load_imm (p, tmp, 0x80000000);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+
+ sse_emit_660f (p, "movdqa", 0x6f, dest, tmp);
+ sse_emit_660f (p, "pcmpgtd", 0x66, src, tmp);
+ sse_emit_660f (p, "pand", 0xdb, tmp, dest);
+ sse_emit_660f (p, "pandn", 0xdf, src, tmp);
+ sse_emit_660f (p, "por", 0xeb, tmp, dest);
+
+ sse_emit_load_imm (p, tmp, 0x80000000);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+}
+
+static void
+sse_rule_minul_slow (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[1]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = p->tmpreg;
+
+ sse_emit_load_imm (p, tmp, 0x80000000);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+
+ sse_emit_660f (p, "movdqa", 0x6f, src, tmp);
+ sse_emit_660f (p, "pcmpgtd", 0x66, dest, tmp);
+ sse_emit_660f (p, "pand", 0xdb, tmp, dest);
+ sse_emit_660f (p, "pandn", 0xdf, src, tmp);
+ sse_emit_660f (p, "por", 0xeb, tmp, dest);
+
+ sse_emit_load_imm (p, tmp, 0x80000000);
+ sse_emit_660f (p, "pxor", 0xef, tmp, src);
+ sse_emit_660f (p, "pxor", 0xef, tmp, dest);
+}
+
+
void
orc_compiler_sse_register_rules (OrcTarget *target)
{
orc_rule_register (rule_set, "convswl", sse_rule_convswl, NULL);
orc_rule_register (rule_set, "convuwl", sse_rule_convuwl, NULL);
orc_rule_register (rule_set, "convssslw", sse_rule_convssslw, NULL);
- orc_rule_register (rule_set, "convsuslw", sse_rule_convsuslw, NULL);
- orc_rule_register (rule_set, "convlw", sse_rule_convlw, NULL);
orc_rule_register (rule_set, "mulswl", sse_rule_mulswl, NULL);
+ /* slow rules */
+ orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL);
+ orc_rule_register (rule_set, "minuw", sse_rule_minuw_slow, NULL);
+ orc_rule_register (rule_set, "avgsb", sse_rule_avgsb_slow, NULL);
+ orc_rule_register (rule_set, "avgsw", sse_rule_avgsw_slow, NULL);
+ orc_rule_register (rule_set, "maxsb", sse_rule_maxsb_slow, NULL);
+ orc_rule_register (rule_set, "minsb", sse_rule_minsb_slow, NULL);
+ orc_rule_register (rule_set, "maxsl", sse_rule_maxsl_slow, NULL);
+ orc_rule_register (rule_set, "minsl", sse_rule_minsl_slow, NULL);
+ orc_rule_register (rule_set, "maxul", sse_rule_maxul_slow, NULL);
+ orc_rule_register (rule_set, "minul", sse_rule_minul_slow, NULL);
+
/* SSE 3 -- no rules */
/* SSSE 3 */
rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target);
- REG(signb);
- REG(signw);
- REG(signl);
+ orc_rule_register (rule_set, "signb", sse_rule_signX, (void *)0);
+ orc_rule_register (rule_set, "signw", sse_rule_signX, (void *)1);
+ orc_rule_register (rule_set, "signl", sse_rule_signX, (void *)2);
REG(absb);
REG(absw);
REG(absl);
+if (0) {
/* SSE 4.1 */
rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target);
REG(minsl);
REG(minul);
REG(mulll);
+ orc_rule_register (rule_set, "convsuslw", sse_rule_convsuslw, NULL);
+ orc_rule_register (rule_set, "convlw", sse_rule_convlw, NULL);
+}
/* SSE 4.2 -- no rules */
#ifndef MAX
#define MAX(a,b) ((a)>(b) ? (a) : (b))
#endif
-#ifndef CLAMP
-#define CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
+#ifndef ORC_CLAMP
+#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
#endif
#define ROUND_UP_SHIFT(x,y) (((x) + (1<<(y)) - 1)>>(y))
#define ROUND_UP_POW2(x,y) (((x) + (1<<(y)) - 1)&((~0)<<(y)))
-TESTS = test1 test2 test3 test4 test5
+TESTS = test1 test2 test3 test4 test5 test_local_opcode_execution
-noinst_PROGRAMS = test1 test2 test3 test4 test5
+noinst_PROGRAMS = test1 test2 test3 test4 test5 test_local_opcode_execution
AM_CFLAGS = $(ORC_CFLAGS)
-LIBS = $(ORC_LIBS)
+LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-0.3.la
int error = FALSE;
-void test_opcode (const char *name);
+void test_opcode (OrcStaticOpcode *opcode);
int
main (int argc, char *argv[])
opcode_set->opcodes[i].src_size[0],
opcode_set->opcodes[i].src_size[1],
opcode_set->opcodes[i].emulate);
- test_opcode (opcode_set->opcodes[i].name);
+ test_opcode (opcode_set->opcodes + i);
}
if (error) return 1;
}
void
-test_opcode (const char *name)
+test_opcode (OrcStaticOpcode *opcode)
{
OrcProgram *p;
char s[40];
int ret;
FILE *file;
- p = orc_program_new_dss (2,2,2);
+ if (opcode->src_size[1] == 0) {
+ p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]);
+ } else {
+ p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0],
+ opcode->src_size[1]);
+ }
- sprintf(s, "test_%s", name);
+ sprintf(s, "test_%s", opcode->name);
orc_program_set_name (p, s);
- orc_program_add_constant (p, 2, 1, "c1");
+ //orc_program_add_constant (p, 2, 1, "c1");
- orc_program_append_str (p, name, "d1", "s1", "c1");
+ orc_program_append_str (p, opcode->name, "d1", "s1", "s2");
ret = orc_program_compile (p);
if (!ret) {
--- /dev/null
+
+#include "config.h"
+
+#include <stdio.h>
+
+#include <orc/orc.h>
+#include <orc-test/orctest.h>
+
+
+int error = FALSE;
+
+void test_opcode (OrcStaticOpcode *opcode);
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+ OrcOpcodeSet *opcode_set;
+
+ orc_test_init();
+ orc_init();
+
+ opcode_set = orc_opcode_set_get ("sys");
+
+ for(i=0;i<opcode_set->n_opcodes;i++){
+ printf("/* %s %d,%d,%d */\n",
+ opcode_set->opcodes[i].name,
+ opcode_set->opcodes[i].dest_size[0],
+ opcode_set->opcodes[i].src_size[0],
+ opcode_set->opcodes[i].src_size[1]);
+ test_opcode (opcode_set->opcodes + i);
+ }
+
+ if (error) return 1;
+ return 0;
+}
+
+void
+test_opcode (OrcStaticOpcode *opcode)
+{
+ OrcProgram *p;
+ char s[40];
+ int ret;
+
+ if (opcode->src_size[1] == 0) {
+ p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]);
+ } else {
+ p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0],
+ opcode->src_size[1]);
+ }
+
+ sprintf(s, "test_%s", opcode->name);
+ orc_program_set_name (p, s);
+
+ orc_program_append_str (p, opcode->name, "d1", "s1", "s2");
+
+ ret = orc_test_compare_output (p);
+ if (!ret) {
+ error = TRUE;
+ }
+
+ orc_program_free (p);
+}
+
+
n = orc_parse (code, &programs);
+#if 1
for(i=0;i<n;i++){
printf("%s\n", programs[i]->name);
orc_test_gcc_compile (programs[i]);
}
+#endif
+#if 0
+ for(i=0;i<n;i++){
+ orc_program_compile_for_target (programs[i], orc_target_get_by_name("c"));
+ printf("%s", orc_program_get_asm_code (programs[i]));
+ }
+#endif
return 0;
}