From: David Schleef Date: Wed, 30 Sep 2009 03:15:34 +0000 (-0700) Subject: Move 32-bit float ops to core library X-Git-Tag: merge-base~31 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=22bdbfc7aeddd66dc8e719f3a019387ddbf33b4e;p=platform%2Fupstream%2Forc.git Move 32-bit float ops to core library --- diff --git a/orc-test/orcarray.c b/orc-test/orcarray.c index ba100ec..2c20f4c 100644 --- a/orc-test/orcarray.c +++ b/orc-test/orcarray.c @@ -9,6 +9,7 @@ #include #include #include +#include #define EXTEND_ROWS 16 #define EXTEND_STRIDE 256 @@ -57,11 +58,29 @@ orc_array_set_random (OrcArray *array, OrcRandom *context) int -orc_array_compare (OrcArray *array1, OrcArray *array2) +orc_array_compare (OrcArray *array1, OrcArray *array2, int flags) { - if (memcmp (array1->alloc_data, array2->alloc_data, - array1->alloc_len) == 0) { + if (flags & ORC_TEST_FLAGS_FLOAT && array1->element_size == 4) { + int j; + for(j=0;jm;j++){ + float *a, *b; + int i; + + a = ORC_PTR_OFFSET (array1->data, j*array1->stride); + b = ORC_PTR_OFFSET (array2->data, j*array2->stride); + + for (i=0;in;i++){ + if (!((isnan(a[i]) && isnan(b[i])) || a[i] == b[i])) { + return FALSE; + } + } + } return TRUE; + } else { + if (memcmp (array1->alloc_data, array2->alloc_data, + array1->alloc_len) == 0) { + return TRUE; + } } return FALSE; diff --git a/orc-test/orcarray.h b/orc-test/orcarray.h index b8f323a..d8c89cb 100644 --- a/orc-test/orcarray.h +++ b/orc-test/orcarray.h @@ -24,7 +24,7 @@ void orc_array_free (OrcArray *array); void orc_array_set_pattern (OrcArray *array, int value); void orc_array_set_random (OrcArray *array, OrcRandom *context); -int orc_array_compare (OrcArray *array1, OrcArray *array2); +int orc_array_compare (OrcArray *array1, OrcArray *array2, int flags); int orc_array_check_out_of_bounds (OrcArray *array); #endif diff --git a/orc-test/orcrandom.c b/orc-test/orcrandom.c index 3b6d5fb..7ada96d 100644 --- a/orc-test/orcrandom.c +++ b/orc-test/orcrandom.c @@ -29,6 +29,16 @@ orc_random_bits (OrcRandom *context, void *data, int n_bytes) } } +void +orc_random_floats (OrcRandom *context, float *data, int n) +{ + int i; + for(i=0;ix = 1103515245*context->x + 12345; + data[i] = (double)(context->x>>16) / 32768.0 - 1.0; + } +} + unsigned int orc_random (OrcRandom *context) { diff --git a/orc-test/orcrandom.h b/orc-test/orcrandom.h index ce7be74..838d267 100644 --- a/orc-test/orcrandom.h +++ b/orc-test/orcrandom.h @@ -13,6 +13,7 @@ struct _OrcRandom { void orc_random_init (OrcRandom *context, int seed); void orc_random_bits (OrcRandom *context, void *data, int n_bytes); +void orc_random_floats (OrcRandom *context, float *data, int n); unsigned int orc_random (OrcRandom *context); ORC_END_DECLS diff --git a/orc-test/orctest.c b/orc-test/orctest.c index 7f07990..8eba583 100644 --- a/orc-test/orctest.c +++ b/orc-test/orctest.c @@ -11,6 +11,7 @@ #include #include #include +#include OrcRandom rand_context; @@ -244,24 +245,25 @@ print_array_val_hex (void *array, int size, int i) } } -float -print_array_val_float (void *array, int size, int i) +int +print_array_val_float (OrcArray *array, int i, int j) { - switch (size) { + void *ptr = ORC_PTR_OFFSET (array->data, + i*array->element_size + j*array->stride); + + switch (array->element_size) { case 4: - { - float *a = array; - printf(" %g", a[i]); - return a[i]; + if (isnan(*(float *)ptr)) { + printf(" nan %08x", *(uint32_t *)ptr); + /* This is to get around signaling/non-signaling nans in the output */ + return (*(uint32_t *)ptr) & 0xffbfffff; + } else { + printf(" %12.5g", *(float *)ptr); + return *(int32_t *)ptr; } - break; case 8: - { - double *a = array; - printf(" %g", a[i]); - return a[i]; - } - break; + printf(" %12.5g", *(double *)ptr); + return *(int64_t *)ptr; default: printf(" ERROR"); return -1; @@ -269,7 +271,7 @@ print_array_val_float (void *array, int size, int i) } static OrcTestResult orc_test_compare_output_full (OrcProgram *program, - int backup); + int flags); OrcTestResult orc_test_compare_output (OrcProgram *program) @@ -280,12 +282,12 @@ orc_test_compare_output (OrcProgram *program) OrcTestResult orc_test_compare_output_backup (OrcProgram *program) { - return orc_test_compare_output_full (program, 1); + return orc_test_compare_output_full (program, ORC_TEST_FLAGS_BACKUP); } OrcTestResult -orc_test_compare_output_full (OrcProgram *program, int backup) +orc_test_compare_output_full (OrcProgram *program, int flags) { OrcExecutor *ex; int n = 64 + (orc_random(&rand_context)&0xf); @@ -304,7 +306,8 @@ orc_test_compare_output_full (OrcProgram *program, int backup) ORC_DEBUG ("got here"); - if (!backup) { +flags |= ORC_TEST_FLAGS_FLOAT; + if (!(flags & ORC_TEST_FLAGS_BACKUP)) { OrcTarget *target; unsigned int flags; @@ -384,7 +387,7 @@ orc_test_compare_output_full (OrcProgram *program, int backup) for(k=0;kvars[k].vartype == ORC_VAR_TYPE_DEST) { - if (!orc_array_compare (dest_exec[k], dest_emul[k])) { + if (!orc_array_compare (dest_exec[k], dest_emul[k], flags)) { for(j=0;jvars[l].name == NULL) continue; if (program->vars[l].vartype == ORC_VAR_TYPE_SRC && program->vars[l].size > 0) { - print_array_val_signed (src[l-ORC_VAR_S1], i, j); + if (flags & ORC_TEST_FLAGS_FLOAT) { + print_array_val_float (src[l-ORC_VAR_S1], i, j); + } else { + print_array_val_signed (src[l-ORC_VAR_S1], i, j); + } } } printf(" ->"); - a = print_array_val_signed (dest_emul[k], i, j); - b = print_array_val_signed (dest_exec[k], i, j); + if (flags & ORC_TEST_FLAGS_FLOAT) { + a = print_array_val_float (dest_emul[k], i, j); + b = print_array_val_float (dest_exec[k], i, j); + } else { + a = print_array_val_signed (dest_emul[k], i, j); + b = print_array_val_signed (dest_exec[k], i, j); + } if (a != b) { printf(" *"); @@ -433,7 +445,11 @@ orc_test_compare_output_full (OrcProgram *program, int backup) if (program->vars[k].name == NULL) continue; if (program->vars[k].vartype == ORC_VAR_TYPE_SRC && program->vars[k].size > 0) { - print_array_val_signed (src[k-ORC_VAR_S1], i, j); + if (flags & ORC_TEST_FLAGS_FLOAT) { + print_array_val_float (src[k-ORC_VAR_S1], i, j); + } else { + print_array_val_signed (src[k-ORC_VAR_S1], i, j); + } } } diff --git a/orc-test/orctest.h b/orc-test/orctest.h index f0c1409..7f087ab 100644 --- a/orc-test/orctest.h +++ b/orc-test/orctest.h @@ -13,6 +13,9 @@ typedef enum { ORC_TEST_OK = 2 } OrcTestResult; +#define ORC_TEST_FLAGS_BACKUP (1<<0) +#define ORC_TEST_FLAGS_FLOAT (1<<1) + void orc_test_init (void); OrcTestResult orc_test_gcc_compile (OrcProgram *p); void orc_test_random_bits (void *data, int n_bytes); diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index 32cb7fb..7d8dbd1 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -554,6 +555,88 @@ accsadubl (OrcOpcodeExecutor *ex, void *user) (int)((uint8_t)ex->src_values[1])); } +/* float ops */ + +static float +ORC_FLOAT_READ(void *addr) +{ + union { + float f; + unsigned int i; + } x; + x.i = *(unsigned int *)(addr); + return x.f; +} + +static void +ORC_FLOAT_WRITE(void *addr, float value) +{ + union { + float f; + unsigned int i; + } x; + x.f = value; + *(unsigned int *)(addr) = x.i; +} + +#if 0 +/* Oh noes! Aliasing rules! */ +#define ORC_FLOAT_READ(addr) (*(float *)(addr)) +#define ORC_FLOAT_WRITE(addr,value) do{ (*(float *)(addr)) = (value); }while(0 +#endif + +#define UNARY_F(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + float a = ORC_FLOAT_READ(&ex->src_values[0]); \ + ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \ +} + +#define BINARY_F(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + void *pa = &ex->src_values[0]; \ + void *pb = &ex->src_values[1]; \ + float a = ORC_FLOAT_READ(pa); \ + float b = ORC_FLOAT_READ(pb); \ + ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \ +} + +#define BINARY_FL(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + float a = ORC_FLOAT_READ(&ex->src_values[0]); \ + float b = ORC_FLOAT_READ(&ex->src_values[1]); \ + ex->dest_values[0] = code ; \ +} + +BINARY_F(addf, a + b) +BINARY_F(subf, a - b) +BINARY_F(mulf, a * b) +BINARY_F(divf, a / b) +UNARY_F(orc_sqrtf, sqrt(a) ) +BINARY_F(maxf, (a>b) ? a : b) +BINARY_F(minf, (adest_values[0] = rintf(ORC_FLOAT_READ(&ex->src_values[0])); +} + +static void +convlf (OrcOpcodeExecutor *ex, void *user) +{ + ORC_FLOAT_WRITE(&ex->dest_values[0], ex->src_values[0]); +} + static OrcStaticOpcode opcodes[] = { @@ -694,6 +777,20 @@ static OrcStaticOpcode opcodes[] = { { "mergewl", mergewl, NULL, 0, { 4 }, { 2, 2 } }, { "mergebw", mergebw, NULL, 0, { 2 }, { 1, 1 } }, + /* float ops */ + { "addf", addf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } }, + { "subf", subf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } }, + { "mulf", mulf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } }, + { "divf", divf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } }, + { "sqrtf", orc_sqrtf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4 } }, + { "maxf", maxf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } }, + { "minf", minf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } }, + { "cmpeqf", cmpeqf, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4, 4 } }, + { "cmpltf", cmpltf, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4, 4 } }, + { "cmplef", cmplef, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4, 4 } }, + { "convfl", convfl, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4 } }, + { "convlf", convlf, NULL, ORC_STATIC_OPCODE_FLOAT_DEST, { 4 }, { 4 } }, + { "" } }; diff --git a/orc/orcprogram.h b/orc/orcprogram.h index 1f50fcb..feee65a 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -233,7 +233,11 @@ struct _OrcOpcodeSet { OrcStaticOpcode *opcodes; }; -#define ORC_STATIC_OPCODE_ACCUMULATOR 1 +#define ORC_STATIC_OPCODE_ACCUMULATOR (1<<0) +#define ORC_STATIC_OPCODE_FLOAT_SRC (1<<1) +#define ORC_STATIC_OPCODE_FLOAT_DEST (1<<2) +#define ORC_STATIC_OPCODE_FLOAT (ORC_STATIC_OPCODE_FLOAT_SRC|ORC_STATIC_OPCODE_FLOAT_DEST) + struct _OrcStaticOpcode { char name[16]; diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 1857272..c7b0cd7 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -863,6 +863,77 @@ sse_rule_minul_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_pxor(p, tmp, dest); } +/* float ops */ + +#define UNARY_F(opcode,insn_name,code) \ +static void \ +sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + orc_sse_emit_0f (p, insn_name, code, \ + p->vars[insn->src_args[0]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + +#define BINARY_F(opcode,insn_name,code) \ +static void \ +sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + orc_sse_emit_0f (p, insn_name, code, \ + p->vars[insn->src_args[1]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + +BINARY_F(addf, "addps", 0x58) +BINARY_F(subf, "subps", 0x5c) +BINARY_F(mulf, "mulps", 0x59) +BINARY_F(divf, "divps", 0x5e) +BINARY_F(maxf, "maxps", 0x5f) +BINARY_F(minf, "minps", 0x5d) +UNARY_F(sqrtf, "sqrtps", 0x51) + +static void +sse_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_sse_emit_0f (p, "cmpeqps", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x00; +} + +static void +sse_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_sse_emit_0f (p, "cmpltps", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x01; +} + +static void +sse_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_sse_emit_0f (p, "cmpleps", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x02; +} + +static void +sse_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_sse_emit_660f (p, "cvtps2dq", 0x5b, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +static void +sse_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + orc_sse_emit_0f (p, "cvtdq2ps", 0x5b, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + void orc_compiler_sse_register_rules (OrcTarget *target) @@ -956,6 +1027,19 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "accl", sse_rule_accl, NULL); orc_rule_register (rule_set, "accsadubl", sse_rule_accsadubl, NULL); + orc_rule_register (rule_set, "addf", sse_rule_addf, NULL); + orc_rule_register (rule_set, "subf", sse_rule_subf, NULL); + orc_rule_register (rule_set, "mulf", sse_rule_mulf, NULL); + orc_rule_register (rule_set, "divf", sse_rule_divf, NULL); + orc_rule_register (rule_set, "minf", sse_rule_minf, NULL); + orc_rule_register (rule_set, "maxf", sse_rule_maxf, NULL); + orc_rule_register (rule_set, "sqrtf", sse_rule_sqrtf, NULL); + orc_rule_register (rule_set, "cmpeqf", sse_rule_cmpeqf, NULL); + orc_rule_register (rule_set, "cmpltf", sse_rule_cmpltf, NULL); + orc_rule_register (rule_set, "cmplef", sse_rule_cmplef, NULL); + orc_rule_register (rule_set, "convfl", sse_rule_convfl, NULL); + orc_rule_register (rule_set, "convlf", sse_rule_convlf, NULL); + /* slow rules */ orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL); orc_rule_register (rule_set, "minuw", sse_rule_minuw_slow, NULL);