From d5f1decc5c7cd864b1ea2d1945d519165f094d8f Mon Sep 17 00:00:00 2001 From: David Schleef Date: Sat, 25 Jul 2009 22:44:54 -0700 Subject: [PATCH] preliminary 2-D support --- orc-test/Makefile.am | 11 ++- orc-test/orcarray.c | 105 ++++++++++++++++++++++++++++ orc-test/orcarray.h | 31 ++++++++ orc-test/orcrandom.c | 37 ++++++++++ orc-test/orcrandom.h | 21 ++++++ orc-test/orctest.c | 183 +++++++++++++++++++----------------------------- orc/orc.c | 3 + orc/orcexecutor.c | 194 +++++++++++++++++++++++++++++---------------------- orc/orcprogram-sse.c | 22 ++++++ orc/orcprogram.c | 14 ++++ orc/orcprogram.h | 38 ++++++++++ orc/orcutils.h | 1 + 12 files changed, 459 insertions(+), 201 deletions(-) create mode 100644 orc-test/orcarray.c create mode 100644 orc-test/orcarray.h create mode 100644 orc-test/orcrandom.c create mode 100644 orc-test/orcrandom.h diff --git a/orc-test/Makefile.am b/orc-test/Makefile.am index 71b286b..5883662 100644 --- a/orc-test/Makefile.am +++ b/orc-test/Makefile.am @@ -6,11 +6,16 @@ lib_LTLIBRARIES = liborc-test-@ORC_MAJORMINOR@.la liborc_test_@ORC_MAJORMINOR@_la_LIBADD = $(ORC_LIBS) liborc_test_@ORC_MAJORMINOR@_la_LDFLAGS = \ -no-undefined -export-symbols-regex 'orc_' -liborc_test_@ORC_MAJORMINOR@_la_CFLAGS = $(ORC_CFLAGS) +liborc_test_@ORC_MAJORMINOR@_la_CFLAGS = $(ORC_CFLAGS) \ + -DORC_ENABLE_UNSTABLE_API liborc_test_@ORC_MAJORMINOR@_la_SOURCES = \ - orctest.c + orctest.c \ + orcarray.c \ + orcrandom.c pkginclude_HEADERS = \ - orctest.h + orctest.h \ + orcarray.h \ + orcrandom.h diff --git a/orc-test/orcarray.c b/orc-test/orcarray.c new file mode 100644 index 0000000..235d33a --- /dev/null +++ b/orc-test/orcarray.c @@ -0,0 +1,105 @@ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +OrcArray * +orc_array_new (int n, int m, int element_size) +{ + OrcArray *ar; + + ar = malloc (sizeof(OrcArray)); + memset (ar, 0, sizeof(OrcArray)); + + ar->n = n; + ar->m = m; + ar->element_size = element_size; + + ar->stride = (n*element_size + 256); + ar->alloc_len = ar->stride * (m+32); + ar->alloc_data = malloc (ar->alloc_len); + + ar->data = ORC_PTR_OFFSET (ar->alloc_data, ar->stride * 32 + 128); + + return ar; +} + +void +orc_array_free (OrcArray *array) +{ + free (array->alloc_data); +} + +void +orc_array_set_pattern (OrcArray *array, int value) +{ + memset (array->alloc_data, value, array->alloc_len); +} + +void +orc_array_set_random (OrcArray *array, OrcRandom *context) +{ + orc_random_bits (context, array->alloc_data, array->alloc_len); +} + + +int +orc_array_compare (OrcArray *array1, OrcArray *array2) +{ + if (memcmp (array1->alloc_data, array2->alloc_data, + array1->alloc_len) == 0) { + return TRUE; + } + + return FALSE; +} + +int +orc_array_check_out_of_bounds (OrcArray *array) +{ + /* FIXME */ + + return TRUE; +} + +#if 0 +void +orc_array_print_compare (OrcArray *array1, OrcArray *array2) +{ + + for(j=0;jm;j++){ + for(i=0;in;i++){ + int a,b; + int j; + + printf("%2d %2d:", i, j); + + for(k=0;kvars[k].name == NULL) continue; + if (program->vars[k].vartype == ORC_VAR_TYPE_SRC && + program->vars[k].size > 0) { + print_array_val_signed (ex->arrays[k], program->vars[k].size, i); + } + } + + printf(" ->"); + a = print_array_val_signed (dest_emul[k], program->vars[k].size, i); + b = print_array_val_signed (dest_exec[k], program->vars[k].size, i); + + if (a != b) { + printf(" *"); + } + + printf("\n"); + } + } +} +#endif + diff --git a/orc-test/orcarray.h b/orc-test/orcarray.h new file mode 100644 index 0000000..b8f323a --- /dev/null +++ b/orc-test/orcarray.h @@ -0,0 +1,31 @@ + +#ifndef _ORC_ARRAY_H_ +#define _ORC_ARRAY_H_ + +#include +#include +#include +#include + +typedef struct _OrcArray OrcArray; +struct _OrcArray { + void *data; + int stride; + int element_size; + int n,m; + + void *alloc_data; + int alloc_len; +}; + +OrcArray *orc_array_new (int n, int m, int element_size); +void orc_array_free (OrcArray *array); + +void orc_array_set_pattern (OrcArray *array, int value); +void orc_array_set_random (OrcArray *array, OrcRandom *context); + +int orc_array_compare (OrcArray *array1, OrcArray *array2); +int orc_array_check_out_of_bounds (OrcArray *array); + +#endif + diff --git a/orc-test/orcrandom.c b/orc-test/orcrandom.c new file mode 100644 index 0000000..3b6d5fb --- /dev/null +++ b/orc-test/orcrandom.c @@ -0,0 +1,37 @@ + +#include "config.h" + +#include +#include +#include +#include + + + +void +orc_random_init (OrcRandom *context, int seed) +{ + + context->x = seed; + +} + + + +void +orc_random_bits (OrcRandom *context, void *data, int n_bytes) +{ + uint8_t *d = data; + int i; + for(i=0;ix = 1103515245*context->x + 12345; + d[i] = context->x>>16; + } +} + +unsigned int +orc_random (OrcRandom *context) +{ + context->x = 1103515245*context->x + 12345; + return context->x; +} diff --git a/orc-test/orcrandom.h b/orc-test/orcrandom.h new file mode 100644 index 0000000..ce7be74 --- /dev/null +++ b/orc-test/orcrandom.h @@ -0,0 +1,21 @@ + +#ifndef _ORC_RANDOM_H_ +#define _ORC_RANDOM_H_ + +#include + +ORC_BEGIN_DECLS + +typedef struct _OrcRandom OrcRandom; +struct _OrcRandom { + unsigned int x; +}; + +void orc_random_init (OrcRandom *context, int seed); +void orc_random_bits (OrcRandom *context, void *data, int n_bytes); +unsigned int orc_random (OrcRandom *context); + +ORC_END_DECLS + +#endif + diff --git a/orc-test/orctest.c b/orc-test/orctest.c index 1735171..190baea 100644 --- a/orc-test/orctest.c +++ b/orc-test/orctest.c @@ -2,15 +2,19 @@ #include "config.h" #include +#include +#include #include #include + #include #include #include -//#include +OrcRandom rand_context; + void orc_test_init (void) { @@ -18,7 +22,7 @@ orc_test_init (void) setvbuf (stdout, NULL, _IONBF, 0); - //srand (time(NULL)); + orc_random_init (&rand_context, 0x12345678); } @@ -130,61 +134,29 @@ orc_test_gcc_compile (OrcProgram *p) } -void -orc_test_random_bits (void *data, int n_bytes) -{ -#if 1 - uint8_t *d = data; - int i; - for(i=0;idata, + i*array->element_size + j*array->stride); + + switch (array->element_size) { case 1: - { - int8_t *a = array; - printf(" %4d", a[i]); - return a[i]; - } - break; + printf(" %4d", *(int8_t *)ptr); + return *(int8_t *)ptr; case 2: - { - int16_t *a = array; - printf(" %5d", a[i]); - return a[i]; - } - break; + printf(" %5d", *(int16_t *)ptr); + return *(int16_t *)ptr; case 4: - { - int32_t *a = array; - printf(" %10d", a[i]); - return a[i]; - } - break; + printf(" %10d", *(int32_t *)ptr); + return *(int32_t *)ptr; case 8: - { - int64_t *a = array; #ifdef HAVE_AMD64 - printf(" %20ld", a[i]); + printf(" %20ld", *(int64_t *)ptr); #else - printf(" %20lld", a[i]); + printf(" %20lld", *(int64_t *)ptr); #endif - return a[i]; - } - break; + return *(int64_t *)ptr; default: return -1; } @@ -296,41 +268,6 @@ print_array_val_float (void *array, int size, int i) } } -int delign_index = 1; - -void * -alloc_array (int n, int size, void **m_ptr) -{ - unsigned char *ptr = malloc (n*size+256*2); - memset (ptr, 0xa5, n*size+256*2); - if (m_ptr) *m_ptr = ptr; - - delign_index++; - delign_index &= 0xf; - - return (ptr + 256 + size*delign_index); -} - -int -check_bounds (void *ptr, int n, int size) -{ - unsigned char *data = ptr; - int i; - - for(i=0;i<100;i++){ - if (data[-1-i] != 0xa5) { - ORC_ERROR("early bounds failure at %d", i); - return FALSE; - } - if (data[n*size+i] != 0xa5) { - ORC_ERROR("late bounds failure at %d", i); - return FALSE; - } - } - - return TRUE; -} - static OrcTestResult orc_test_compare_output_full (OrcProgram *program, int backup); @@ -351,11 +288,10 @@ OrcTestResult orc_test_compare_output_full (OrcProgram *program, int backup) { OrcExecutor *ex; - int n = 64 + (rand()&0xf); - void *dest_exec[4] = { NULL, NULL, NULL, NULL }; - void *dest_emul[4] = { NULL, NULL, NULL, NULL }; - void *ptr_exec[4]; - void *ptr_emul[4]; + int n = 64 + (orc_random(&rand_context)&0xf); + OrcArray *dest_exec[4] = { NULL, NULL, NULL, NULL }; + OrcArray *dest_emul[4] = { NULL, NULL, NULL, NULL }; + OrcArray *src[8] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; int i; int k; int have_dest = FALSE; @@ -364,6 +300,8 @@ orc_test_compare_output_full (OrcProgram *program, int backup) int acc_exec = 0, acc_emul = 0; int ret = ORC_TEST_OK; + ORC_DEBUG ("got here"); + if (!backup) { result = orc_program_compile (program); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL(result)) { @@ -373,44 +311,61 @@ orc_test_compare_output_full (OrcProgram *program, int backup) ex = orc_executor_new (program); orc_executor_set_n (ex, n); + if (program->is_2d) { + orc_executor_set_m (ex, 1); + } else { + orc_executor_set_m (ex, 1); + } + ORC_DEBUG("size %d %d", ex->n, ex->params[ORC_VAR_A1]); for(i=0;ivars[i].name == NULL) continue; if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) { - uint8_t *data; - data = alloc_array (n,program->vars[i].size, NULL); - orc_test_random_bits (data, n*program->vars[i].size); - orc_executor_set_array (ex, i, data); + src[i] = orc_array_new (n, 1, program->vars[i].size); + orc_array_set_random (src[i], &rand_context); + orc_executor_set_array (ex, i, src[i]->data); } else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) { - dest_exec[i] = alloc_array (n, program->vars[i].size, &ptr_exec[i]); - dest_emul[i] = alloc_array (n, program->vars[i].size, &ptr_emul[i]); - memset (dest_exec[i], 0xa5, n*program->vars[i].size); - memset (dest_emul[i], 0xa5, n*program->vars[i].size); - - orc_executor_set_array (ex, i, dest_exec[i]); - have_dest = TRUE; + dest_exec[i] = orc_array_new (n, 1, program->vars[i].size); + orc_array_set_pattern (dest_exec[i], 0xa5); + dest_emul[i] = orc_array_new (n, 1, program->vars[i].size); + orc_array_set_pattern (dest_emul[i], 0xa5); } else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) { orc_executor_set_param (ex, i, 2); } } - orc_executor_run (ex); - for(i=0;ivars[i].vartype == ORC_VAR_TYPE_DEST) { - orc_executor_set_array (ex, i, dest_emul[i]); + orc_executor_set_array (ex, i, dest_exec[i]->data); + have_dest = TRUE; } + } + ORC_DEBUG ("running"); + orc_executor_run (ex); + ORC_DEBUG ("done running"); + for(i=0;ivars[i].vartype == ORC_VAR_TYPE_ACCUMULATOR) { acc_exec = ex->accumulators[0]; have_acc = TRUE; } } + + for(i=0;ivars[i].vartype == ORC_VAR_TYPE_DEST) { + orc_executor_set_array (ex, i, dest_emul[i]->data); + } + } orc_executor_emulate (ex); + for(i=0;ivars[i].vartype == ORC_VAR_TYPE_ACCUMULATOR) { + acc_emul = ex->accumulators[0]; + } + } for(k=0;kvars[k].vartype == ORC_VAR_TYPE_DEST) { - if (memcmp (dest_exec[k], dest_emul[k], n*program->vars[k].size) != 0) { + if (!orc_array_compare (dest_exec[k], dest_emul[k])) { for(i=0;ivars[j].name == NULL) continue; if (program->vars[j].vartype == ORC_VAR_TYPE_SRC && program->vars[j].size > 0) { - print_array_val_signed (ex->arrays[j], program->vars[j].size, i); + print_array_val_signed (ex->arrays[j], i, 0); } } printf(" ->"); - a = print_array_val_signed (dest_emul[k], program->vars[k].size, i); - b = print_array_val_signed (dest_exec[k], program->vars[k].size, i); + a = print_array_val_signed (dest_emul[k], i, 0); + b = print_array_val_signed (dest_exec[k], i, 0); if (a != b) { printf(" *"); @@ -438,17 +393,11 @@ orc_test_compare_output_full (OrcProgram *program, int backup) ret = ORC_TEST_FAILED; } - if (!check_bounds (dest_exec[k], n, program->vars[k].size)) { + if (!orc_array_check_out_of_bounds (dest_exec[k])) { printf("out of bounds failure\n"); ret = ORC_TEST_FAILED; } - - free (ptr_exec[k]); - free (ptr_emul[k]); - } - if (program->vars[k].vartype == ORC_VAR_TYPE_ACCUMULATOR) { - acc_emul = ex->accumulators[0]; } } @@ -478,6 +427,14 @@ orc_test_compare_output_full (OrcProgram *program, int backup) printf("%s", orc_program_get_asm_code (program)); } + for(i=0;i<4;i++){ + if (dest_exec[i]) orc_array_free (dest_exec[i]); + if (dest_emul[i]) orc_array_free (dest_emul[i]); + } + for(i=0;i<8;i++){ + if (src[i]) orc_array_free (src[i]); + } + orc_executor_free (ex); return ret; diff --git a/orc/orc.c b/orc/orc.c index 813cdc3..588aa29 100644 --- a/orc/orc.c +++ b/orc/orc.c @@ -6,6 +6,7 @@ #include #include +#include /** * SECTION:orc @@ -30,6 +31,8 @@ orc_init (void) _inited = 1; + ORC_ASSERT(sizeof(OrcExecutor) == sizeof(OrcExecutorAlt)); + _orc_debug_init(); orc_opcode_init(); orc_c_init(); diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c index db2f83b..585b31b 100644 --- a/orc/orcexecutor.c +++ b/orc/orcexecutor.c @@ -39,6 +39,8 @@ orc_executor_run (OrcExecutor *ex) { void (*func) (OrcExecutor *); + ORC_DEBUG("run"); + func = ex->program->code_exec; if (func) { func (ex); @@ -52,6 +54,11 @@ void orc_executor_set_program (OrcExecutor *ex, OrcProgram *program) { ex->program = program; + if (program->code_exec) { + ex->arrays[ORC_VAR_A1] = (void *)program->code_exec; + } else { + ex->arrays[ORC_VAR_A1] = (void *)orc_executor_emulate; + } } void @@ -103,11 +110,18 @@ orc_executor_set_n (OrcExecutor *ex, int n) } void +orc_executor_set_m (OrcExecutor *ex, int m) +{ + ORC_EXECUTOR_M(ex) = m; +} + +void orc_executor_emulate (OrcExecutor *ex) { int i; int j; int k; + int m, m_index; OrcProgram *program = ex->program; OrcInstruction *insn; OrcStaticOpcode *opcode; @@ -118,97 +132,107 @@ orc_executor_emulate (OrcExecutor *ex) ex->accumulators[2] = 0; ex->accumulators[3] = 0; + ORC_DEBUG("emulating"); + memset (&opcode_ex, 0, sizeof(opcode_ex)); - for(i=0;in;i++){ - for(j=0;jn_insns;j++){ - insn = program->insns + j; - opcode = insn->opcode; - - /* set up args */ - for(k=0;kvars + insn->src_args[k]; - - if (opcode->src_size[k] == 0) continue; - - if (var->vartype == ORC_VAR_TYPE_CONST) { - opcode_ex.src_values[k] = var->value; - } else if (var->vartype == ORC_VAR_TYPE_PARAM) { - opcode_ex.src_values[k] = ex->params[insn->src_args[k]]; - } else if (var->vartype == ORC_VAR_TYPE_TEMP) { - /* FIXME shouldn't store executor stuff in program */ - opcode_ex.src_values[k] = var->value; - } else if (var->vartype == ORC_VAR_TYPE_SRC || - var->vartype == ORC_VAR_TYPE_DEST) { - void *ptr = ex->arrays[insn->src_args[k]] + var->size*i; - - switch (var->size) { - case 1: - opcode_ex.src_values[k] = *(int8_t *)ptr; - break; - case 2: - opcode_ex.src_values[k] = *(int16_t *)ptr; - break; - case 4: - opcode_ex.src_values[k] = *(int32_t *)ptr; - break; - case 8: - opcode_ex.src_values[k] = *(int64_t *)ptr; - break; - default: - ORC_ERROR("unhandled size %d", program->vars[insn->src_args[k]].size); + if (program->is_2d) { + m = ORC_EXECUTOR_M(ex); + } else { + m = 1; + } + for(m_index=0;m_indexn;i++){ + for(j=0;jn_insns;j++){ + insn = program->insns + j; + opcode = insn->opcode; + + /* set up args */ + for(k=0;kvars + insn->src_args[k]; + + if (opcode->src_size[k] == 0) continue; + + if (var->vartype == ORC_VAR_TYPE_CONST) { + opcode_ex.src_values[k] = var->value; + } else if (var->vartype == ORC_VAR_TYPE_PARAM) { + opcode_ex.src_values[k] = ex->params[insn->src_args[k]]; + } else if (var->vartype == ORC_VAR_TYPE_TEMP) { + /* FIXME shouldn't store executor stuff in program */ + opcode_ex.src_values[k] = var->value; + } else if (var->vartype == ORC_VAR_TYPE_SRC || + var->vartype == ORC_VAR_TYPE_DEST) { + void *ptr = ORC_PTR_OFFSET(ex->arrays[insn->src_args[k]], + var->size*i + ex->params[insn->src_args[k]]*m_index); + + switch (var->size) { + case 1: + opcode_ex.src_values[k] = *(int8_t *)ptr; + break; + case 2: + opcode_ex.src_values[k] = *(int16_t *)ptr; + break; + case 4: + opcode_ex.src_values[k] = *(int32_t *)ptr; + break; + case 8: + opcode_ex.src_values[k] = *(int64_t *)ptr; + break; + default: + ORC_ERROR("unhandled size %d", program->vars[insn->src_args[k]].size); + } + } else { + ORC_ERROR("shouldn't be reached (%d)", var->vartype); } - } else { - ORC_ERROR("shouldn't be reached (%d)", var->vartype); } - } - opcode->emulate (&opcode_ex, opcode->emulate_user); - - for(k=0;kvars + insn->dest_args[k]; - - if (opcode->dest_size[k] == 0) continue; - - if (var->vartype == ORC_VAR_TYPE_TEMP) { - /* FIXME shouldn't store executor stuff in program */ - var->value = opcode_ex.dest_values[k]; - } else if (var->vartype == ORC_VAR_TYPE_DEST) { - void *ptr = ex->arrays[insn->dest_args[k]] + var->size*i; - - switch (var->size) { - case 1: - *(int8_t *)ptr = opcode_ex.dest_values[k]; - break; - case 2: - *(int16_t *)ptr = opcode_ex.dest_values[k]; - break; - case 4: - *(int32_t *)ptr = opcode_ex.dest_values[k]; - break; - case 8: - *(int64_t *)ptr = opcode_ex.dest_values[k]; - break; - default: - ORC_ERROR("unhandled size %d", program->vars[insn->dest_args[k]].size); - } - } else if (var->vartype == ORC_VAR_TYPE_ACCUMULATOR) { - switch (var->size) { - case 2: - ex->accumulators[insn->dest_args[k] - ORC_VAR_A1] += - opcode_ex.dest_values[k]; - ex->accumulators[insn->dest_args[k] - ORC_VAR_A1] &= 0xffff; - break; - case 4: - ex->accumulators[insn->dest_args[k] - ORC_VAR_A1] += - opcode_ex.dest_values[k]; - break; - default: - ORC_ERROR("unhandled size %d",program->vars[insn->dest_args[k]].size); + opcode->emulate (&opcode_ex, opcode->emulate_user); + + for(k=0;kvars + insn->dest_args[k]; + + if (opcode->dest_size[k] == 0) continue; + + if (var->vartype == ORC_VAR_TYPE_TEMP) { + /* FIXME shouldn't store executor stuff in program */ + var->value = opcode_ex.dest_values[k]; + } else if (var->vartype == ORC_VAR_TYPE_DEST) { + void *ptr = ORC_PTR_OFFSET(ex->arrays[insn->dest_args[k]], + var->size*i + ex->params[insn->dest_args[k]]*m_index); + + switch (var->size) { + case 1: + *(int8_t *)ptr = opcode_ex.dest_values[k]; + break; + case 2: + *(int16_t *)ptr = opcode_ex.dest_values[k]; + break; + case 4: + *(int32_t *)ptr = opcode_ex.dest_values[k]; + break; + case 8: + *(int64_t *)ptr = opcode_ex.dest_values[k]; + break; + default: + ORC_ERROR("unhandled size %d", program->vars[insn->dest_args[k]].size); + } + } else if (var->vartype == ORC_VAR_TYPE_ACCUMULATOR) { + switch (var->size) { + case 2: + ex->accumulators[insn->dest_args[k] - ORC_VAR_A1] += + opcode_ex.dest_values[k]; + ex->accumulators[insn->dest_args[k] - ORC_VAR_A1] &= 0xffff; + break; + case 4: + ex->accumulators[insn->dest_args[k] - ORC_VAR_A1] += + opcode_ex.dest_values[k]; + break; + default: + ORC_ERROR("unhandled size %d", program->vars[insn->dest_args[k]].size); + } + } else { + ORC_ERROR("shouldn't be reached (%d)", var->vartype); } - ex->accumulators[0] += opcode_ex.dest_values[k]; - } else { - ORC_ERROR("shouldn't be reached (%d)", var->vartype); } } } diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 3d3df75..cb8ad3d 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -405,6 +405,20 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) orc_x86_emit_prologue (compiler); + if (compiler->program->is_2d) { + orc_x86_emit_mov_memoffset_reg (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A1]), + compiler->exec_reg, X86_EAX); + orc_x86_emit_test_reg_reg (compiler, 4, X86_EAX, X86_EAX); + orc_x86_emit_jle (compiler, 17); + + orc_x86_emit_mov_reg_memoffset (compiler, 4, X86_EAX, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[ORC_VAR_A2]), + compiler->exec_reg); + + orc_x86_emit_label (compiler, 16); + } + if (compiler->loop_shift > 0) { orc_x86_emit_mov_imm_reg (compiler, 4, 16, X86_EAX); orc_x86_emit_sub_memoffset_reg (compiler, 4, @@ -521,6 +535,14 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) compiler->loop_shift = save_loop_shift; } + if (compiler->program->is_2d) { + orc_x86_emit_dec_memoffset (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A2]), + compiler->exec_reg); + orc_x86_emit_jne (compiler, 16); + orc_x86_emit_label (compiler, 17); + } + sse_save_accumulators (compiler); orc_x86_emit_epilogue (compiler); diff --git a/orc/orcprogram.c b/orc/orcprogram.c index 02c24c5..c2efe81 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -170,6 +170,20 @@ orc_program_set_name (OrcProgram *program, const char *name) } /** + * orc_program_set_2d: + * @program: a pointer to an OrcProgram structure + * + * Sets a flag on the program indicating that arrays are two + * dimensional. This causes the compiler to generate code for + * an OrcExec2D executor. + */ +void +orc_program_set_2d (OrcProgram *program) +{ + program->is_2d = TRUE; +} + +/** * orc_program_set_backup_function: * @program: a pointer to an OrcProgram structure * @func: a function that performs the operations in the program diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 8e7f913..f6fb283 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -7,6 +7,7 @@ typedef struct _OrcOpcodeExecutor OrcOpcodeExecutor; typedef struct _OrcExecutor OrcExecutor; +typedef struct _OrcExecutorAlt OrcExecutorAlt; typedef struct _OrcVariable OrcVariable; typedef struct _OrcOpcodeSet OrcOpcodeSet; typedef struct _OrcStaticOpcode OrcStaticOpcode; @@ -26,6 +27,7 @@ typedef void (*OrcExecutorFunc)(OrcExecutor *ex); #define ORC_N_REGS (32*4) #define ORC_N_INSNS 100 #define ORC_N_VARIABLES 64 +#define ORC_N_ARRAYS 12 #define ORC_N_REGISTERS 20 #define ORC_N_FIXUPS 20 #define ORC_N_CONSTANTS 20 @@ -306,6 +308,9 @@ struct _OrcProgram { int code_size; void *backup_func; + int is_2d; + int constant_n; + int constant_m; }; /** @@ -385,8 +390,39 @@ struct _OrcExecutor { void *arrays[ORC_N_VARIABLES]; int params[ORC_N_VARIABLES]; int accumulators[4]; + /* exec pointer is stored in arrays[ORC_VAR_A1] */ + /* the stride for arrays[x] is stored in params[x] */ + /* m is stored in params[ORC_VAR_A1] */ + /* m_index is stored in params[ORC_VAR_A2] */ + /* elapsed time is stored in params[ORC_VAR_A3] */ }; +/* the alternate view of OrcExecutor */ +struct _OrcExecutorAlt { + /*< private >*/ + OrcProgram *program; + int n; + int counter1; + int counter2; + int counter3; + + void *arrays[ORC_N_ARRAYS]; + OrcExecutorFunc exec; + void *unused1[ORC_N_VARIABLES - ORC_N_ARRAYS - 1]; + int strides[ORC_N_ARRAYS]; + int m; + int m_index; + int time; + int unused2[ORC_VAR_P1-ORC_VAR_A4]; + int params[ORC_VAR_T1-ORC_VAR_P1]; + int unused3[ORC_N_VARIABLES - ORC_VAR_T1]; + int accumulators[4]; +}; +#define ORC_EXECUTOR_EXEC(ex) ((OrcExecutorFunc)((ex)->arrays[ORC_VAR_A1])) +#define ORC_EXECUTOR_M(ex) ((ex)->params[ORC_VAR_A1]) +#define ORC_EXECUTOR_M_INDEX(ex) ((ex)->params[ORC_VAR_A2]) +#define ORC_EXECUTOR_TIME(ex) ((ex)->params[ORC_VAR_A3]) + /** * OrcTarget: * @@ -419,6 +455,7 @@ void orc_opcode_init (void); const char * orc_program_get_name (OrcProgram *program); void orc_program_set_name (OrcProgram *program, const char *name); +void orc_program_set_2d (OrcProgram *program); void orc_program_append (OrcProgram *p, const char *opcode, int arg0, int arg1, int arg2); void orc_program_append_str (OrcProgram *p, const char *opcode, @@ -462,6 +499,7 @@ void orc_executor_set_param_str (OrcExecutor *ex, const char *name, int value); int orc_executor_get_accumulator (OrcExecutor *ex, int var); int orc_executor_get_accumulator_str (OrcExecutor *ex, const char *name); void orc_executor_set_n (OrcExecutor *ex, int n); +void orc_executor_set_m (OrcExecutor *ex, int m); void orc_executor_emulate (OrcExecutor *ex); void orc_executor_run (OrcExecutor *ex); diff --git a/orc/orcutils.h b/orc/orcutils.h index 79ad8e1..74e8d67 100644 --- a/orc/orcutils.h +++ b/orc/orcutils.h @@ -52,6 +52,7 @@ typedef unsigned int orc_bool; #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) #endif #define ORC_PTR_TO_INT(x) ((int)(long)(x)) +#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) #define ORC_READ_UINT32_LE(ptr) \ ((uint32_t)( \ -- 2.7.4