#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <math.h>
#define EXTEND_ROWS 16
#define EXTEND_STRIDE 256
int
-orc_array_compare (OrcArray *array1, OrcArray *array2)
+orc_array_compare (OrcArray *array1, OrcArray *array2, int flags)
{
- if (memcmp (array1->alloc_data, array2->alloc_data,
- array1->alloc_len) == 0) {
+ if (flags & ORC_TEST_FLAGS_FLOAT && array1->element_size == 4) {
+ int j;
+ for(j=0;j<array1->m;j++){
+ float *a, *b;
+ int i;
+
+ a = ORC_PTR_OFFSET (array1->data, j*array1->stride);
+ b = ORC_PTR_OFFSET (array2->data, j*array2->stride);
+
+ for (i=0;i<array1->n;i++){
+ if (!((isnan(a[i]) && isnan(b[i])) || a[i] == b[i])) {
+ return FALSE;
+ }
+ }
+ }
return TRUE;
+ } else {
+ if (memcmp (array1->alloc_data, array2->alloc_data,
+ array1->alloc_len) == 0) {
+ return TRUE;
+ }
}
return FALSE;
void orc_array_set_pattern (OrcArray *array, int value);
void orc_array_set_random (OrcArray *array, OrcRandom *context);
-int orc_array_compare (OrcArray *array1, OrcArray *array2);
+int orc_array_compare (OrcArray *array1, OrcArray *array2, int flags);
int orc_array_check_out_of_bounds (OrcArray *array);
#endif
}
}
+void
+orc_random_floats (OrcRandom *context, float *data, int n)
+{
+ int i;
+ for(i=0;i<n;i++){
+ context->x = 1103515245*context->x + 12345;
+ data[i] = (double)(context->x>>16) / 32768.0 - 1.0;
+ }
+}
+
unsigned int
orc_random (OrcRandom *context)
{
void orc_random_init (OrcRandom *context, int seed);
void orc_random_bits (OrcRandom *context, void *data, int n_bytes);
+void orc_random_floats (OrcRandom *context, float *data, int n);
unsigned int orc_random (OrcRandom *context);
ORC_END_DECLS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <math.h>
OrcRandom rand_context;
}
}
-float
-print_array_val_float (void *array, int size, int i)
+int
+print_array_val_float (OrcArray *array, int i, int j)
{
- switch (size) {
+ void *ptr = ORC_PTR_OFFSET (array->data,
+ i*array->element_size + j*array->stride);
+
+ switch (array->element_size) {
case 4:
- {
- float *a = array;
- printf(" %g", a[i]);
- return a[i];
+ if (isnan(*(float *)ptr)) {
+ printf(" nan %08x", *(uint32_t *)ptr);
+ /* This is to get around signaling/non-signaling nans in the output */
+ return (*(uint32_t *)ptr) & 0xffbfffff;
+ } else {
+ printf(" %12.5g", *(float *)ptr);
+ return *(int32_t *)ptr;
}
- break;
case 8:
- {
- double *a = array;
- printf(" %g", a[i]);
- return a[i];
- }
- break;
+ printf(" %12.5g", *(double *)ptr);
+ return *(int64_t *)ptr;
default:
printf(" ERROR");
return -1;
}
static OrcTestResult orc_test_compare_output_full (OrcProgram *program,
- int backup);
+ int flags);
OrcTestResult
orc_test_compare_output (OrcProgram *program)
OrcTestResult
orc_test_compare_output_backup (OrcProgram *program)
{
- return orc_test_compare_output_full (program, 1);
+ return orc_test_compare_output_full (program, ORC_TEST_FLAGS_BACKUP);
}
OrcTestResult
-orc_test_compare_output_full (OrcProgram *program, int backup)
+orc_test_compare_output_full (OrcProgram *program, int flags)
{
OrcExecutor *ex;
int n = 64 + (orc_random(&rand_context)&0xf);
ORC_DEBUG ("got here");
- if (!backup) {
+flags |= ORC_TEST_FLAGS_FLOAT;
+ if (!(flags & ORC_TEST_FLAGS_BACKUP)) {
OrcTarget *target;
unsigned int flags;
for(k=0;k<ORC_N_VARIABLES;k++){
if (program->vars[k].vartype == ORC_VAR_TYPE_DEST) {
- if (!orc_array_compare (dest_exec[k], dest_emul[k])) {
+ if (!orc_array_compare (dest_exec[k], dest_emul[k], flags)) {
for(j=0;j<m;j++){
for(i=0;i<n;i++){
int a,b;
if (program->vars[l].name == NULL) continue;
if (program->vars[l].vartype == ORC_VAR_TYPE_SRC &&
program->vars[l].size > 0) {
- print_array_val_signed (src[l-ORC_VAR_S1], i, j);
+ if (flags & ORC_TEST_FLAGS_FLOAT) {
+ print_array_val_float (src[l-ORC_VAR_S1], i, j);
+ } else {
+ print_array_val_signed (src[l-ORC_VAR_S1], i, j);
+ }
}
}
printf(" ->");
- a = print_array_val_signed (dest_emul[k], i, j);
- b = print_array_val_signed (dest_exec[k], i, j);
+ if (flags & ORC_TEST_FLAGS_FLOAT) {
+ a = print_array_val_float (dest_emul[k], i, j);
+ b = print_array_val_float (dest_exec[k], i, j);
+ } else {
+ a = print_array_val_signed (dest_emul[k], i, j);
+ b = print_array_val_signed (dest_exec[k], i, j);
+ }
if (a != b) {
printf(" *");
if (program->vars[k].name == NULL) continue;
if (program->vars[k].vartype == ORC_VAR_TYPE_SRC &&
program->vars[k].size > 0) {
- print_array_val_signed (src[k-ORC_VAR_S1], i, j);
+ if (flags & ORC_TEST_FLAGS_FLOAT) {
+ print_array_val_float (src[k-ORC_VAR_S1], i, j);
+ } else {
+ print_array_val_signed (src[k-ORC_VAR_S1], i, j);
+ }
}
}
ORC_TEST_OK = 2
} OrcTestResult;
+#define ORC_TEST_FLAGS_BACKUP (1<<0)
+#define ORC_TEST_FLAGS_FLOAT (1<<1)
+
void orc_test_init (void);
OrcTestResult orc_test_gcc_compile (OrcProgram *p);
void orc_test_random_bits (void *data, int n_bytes);
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
+#include <math.h>
#include <orc/orcprogram.h>
#include <orc/orcdebug.h>
(int)((uint8_t)ex->src_values[1]));
}
+/* float ops */
+
+static float
+ORC_FLOAT_READ(void *addr)
+{
+ union {
+ float f;
+ unsigned int i;
+ } x;
+ x.i = *(unsigned int *)(addr);
+ return x.f;
+}
+
+static void
+ORC_FLOAT_WRITE(void *addr, float value)
+{
+ union {
+ float f;
+ unsigned int i;
+ } x;
+ x.f = value;
+ *(unsigned int *)(addr) = x.i;
+}
+
+#if 0
+/* Oh noes! Aliasing rules! */
+#define ORC_FLOAT_READ(addr) (*(float *)(addr))
+#define ORC_FLOAT_WRITE(addr,value) do{ (*(float *)(addr)) = (value); }while(0
+#endif
+
+#define UNARY_F(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ float a = ORC_FLOAT_READ(&ex->src_values[0]); \
+ ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \
+}
+
+#define BINARY_F(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ void *pa = &ex->src_values[0]; \
+ void *pb = &ex->src_values[1]; \
+ float a = ORC_FLOAT_READ(pa); \
+ float b = ORC_FLOAT_READ(pb); \
+ ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \
+}
+
+#define BINARY_FL(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ float a = ORC_FLOAT_READ(&ex->src_values[0]); \
+ float b = ORC_FLOAT_READ(&ex->src_values[1]); \
+ ex->dest_values[0] = code ; \
+}
+
+BINARY_F(addf, a + b)
+BINARY_F(subf, a - b)
+BINARY_F(mulf, a * b)
+BINARY_F(divf, a / b)
+UNARY_F(orc_sqrtf, sqrt(a) )
+BINARY_F(maxf, (a>b) ? a : b)
+BINARY_F(minf, (a<b) ? a : b)
+
+BINARY_FL(cmpeqf, (a == b) ? (~0) : 0)
+BINARY_FL(cmpltf, (a < b) ? (~0) : 0)
+BINARY_FL(cmplef, (a <= b) ? (~0) : 0)
+
+static void
+convfl (OrcOpcodeExecutor *ex, void *user)
+{
+ ex->dest_values[0] = rintf(ORC_FLOAT_READ(&ex->src_values[0]));
+}
+
+static void
+convlf (OrcOpcodeExecutor *ex, void *user)
+{
+ ORC_FLOAT_WRITE(&ex->dest_values[0], ex->src_values[0]);
+}
+
static OrcStaticOpcode opcodes[] = {
{ "mergewl", mergewl, NULL, 0, { 4 }, { 2, 2 } },
{ "mergebw", mergebw, NULL, 0, { 2 }, { 1, 1 } },
+ /* float ops */
+ { "addf", addf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } },
+ { "subf", subf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } },
+ { "mulf", mulf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } },
+ { "divf", divf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } },
+ { "sqrtf", orc_sqrtf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4 } },
+ { "maxf", maxf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } },
+ { "minf", minf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4, 4 } },
+ { "cmpeqf", cmpeqf, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4, 4 } },
+ { "cmpltf", cmpltf, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4, 4 } },
+ { "cmplef", cmplef, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4, 4 } },
+ { "convfl", convfl, NULL, ORC_STATIC_OPCODE_FLOAT_SRC, { 4 }, { 4 } },
+ { "convlf", convlf, NULL, ORC_STATIC_OPCODE_FLOAT_DEST, { 4 }, { 4 } },
+
{ "" }
};
OrcStaticOpcode *opcodes;
};
-#define ORC_STATIC_OPCODE_ACCUMULATOR 1
+#define ORC_STATIC_OPCODE_ACCUMULATOR (1<<0)
+#define ORC_STATIC_OPCODE_FLOAT_SRC (1<<1)
+#define ORC_STATIC_OPCODE_FLOAT_DEST (1<<2)
+#define ORC_STATIC_OPCODE_FLOAT (ORC_STATIC_OPCODE_FLOAT_SRC|ORC_STATIC_OPCODE_FLOAT_DEST)
+
struct _OrcStaticOpcode {
char name[16];
orc_sse_emit_pxor(p, tmp, dest);
}
+/* float ops */
+
+#define UNARY_F(opcode,insn_name,code) \
+static void \
+sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ orc_sse_emit_0f (p, insn_name, code, \
+ p->vars[insn->src_args[0]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+#define BINARY_F(opcode,insn_name,code) \
+static void \
+sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ orc_sse_emit_0f (p, insn_name, code, \
+ p->vars[insn->src_args[1]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+BINARY_F(addf, "addps", 0x58)
+BINARY_F(subf, "subps", 0x5c)
+BINARY_F(mulf, "mulps", 0x59)
+BINARY_F(divf, "divps", 0x5e)
+BINARY_F(maxf, "maxps", 0x5f)
+BINARY_F(minf, "minps", 0x5d)
+UNARY_F(sqrtf, "sqrtps", 0x51)
+
+static void
+sse_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_sse_emit_0f (p, "cmpeqps", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x00;
+}
+
+static void
+sse_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_sse_emit_0f (p, "cmpltps", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x01;
+}
+
+static void
+sse_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_sse_emit_0f (p, "cmpleps", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x02;
+}
+
+static void
+sse_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_sse_emit_660f (p, "cvtps2dq", 0x5b,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+static void
+sse_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ orc_sse_emit_0f (p, "cvtdq2ps", 0x5b,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
void
orc_compiler_sse_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "accl", sse_rule_accl, NULL);
orc_rule_register (rule_set, "accsadubl", sse_rule_accsadubl, NULL);
+ orc_rule_register (rule_set, "addf", sse_rule_addf, NULL);
+ orc_rule_register (rule_set, "subf", sse_rule_subf, NULL);
+ orc_rule_register (rule_set, "mulf", sse_rule_mulf, NULL);
+ orc_rule_register (rule_set, "divf", sse_rule_divf, NULL);
+ orc_rule_register (rule_set, "minf", sse_rule_minf, NULL);
+ orc_rule_register (rule_set, "maxf", sse_rule_maxf, NULL);
+ orc_rule_register (rule_set, "sqrtf", sse_rule_sqrtf, NULL);
+ orc_rule_register (rule_set, "cmpeqf", sse_rule_cmpeqf, NULL);
+ orc_rule_register (rule_set, "cmpltf", sse_rule_cmpltf, NULL);
+ orc_rule_register (rule_set, "cmplef", sse_rule_cmplef, NULL);
+ orc_rule_register (rule_set, "convfl", sse_rule_convfl, NULL);
+ orc_rule_register (rule_set, "convlf", sse_rule_convlf, NULL);
+
/* slow rules */
orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL);
orc_rule_register (rule_set, "minuw", sse_rule_minuw_slow, NULL);