From 712ae2a01f0e74d9eeac28546b9b26256c9a23b5 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Mon, 27 Apr 2009 18:58:28 -0700 Subject: [PATCH] Add orc-float library Also fix some core features for 8-byte values --- Makefile.am | 2 +- configure.ac | 1 + orc-float/Makefile.am | 16 +++ orc-float/README | 76 +++++++++++ orc-float/orcfloat-sse.c | 334 +++++++++++++++++++++++++++++++++++++++++++++++ orc-float/orcfloat.c | 188 ++++++++++++++++++++++++++ orc-float/orcfloat.h | 15 +++ orc-test/orctest.c | 60 ++++++++- orc/orcexecutor.c | 6 + orc/orcprogram-sse.c | 7 + orc/orcrules-sse.c | 22 ++++ orc/x86.h | 1 + 12 files changed, 724 insertions(+), 4 deletions(-) create mode 100644 orc-float/Makefile.am create mode 100644 orc-float/README create mode 100644 orc-float/orcfloat-sse.c create mode 100644 orc-float/orcfloat.c create mode 100644 orc-float/orcfloat.h diff --git a/Makefile.am b/Makefile.am index e43dde4..65eeed8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,7 +1,7 @@ AUTOMAKE_OPTIONS = foreign -SUBDIRS = orc orc-pixel orc-test testsuite examples doc tools +SUBDIRS = orc orc-float orc-pixel orc-test testsuite examples doc tools EXTRA_DIST = COPYING autogen.sh gtk-doc.make diff --git a/configure.ac b/configure.ac index cbb97f2..7ddb7bd 100644 --- a/configure.ac +++ b/configure.ac @@ -118,6 +118,7 @@ AC_CONFIG_FILES([ Makefile doc/Makefile orc/Makefile +orc-float/Makefile orc-pixel/Makefile orc-test/Makefile testsuite/Makefile diff --git a/orc-float/Makefile.am b/orc-float/Makefile.am new file mode 100644 index 0000000..bd1cc67 --- /dev/null +++ b/orc-float/Makefile.am @@ -0,0 +1,16 @@ + +pkgincludedir = $(includedir)/orc-@ORC_MAJORMINOR@/orc-float + +lib_LTLIBRARIES = liborc-float-@ORC_MAJORMINOR@.la + +liborc_float_@ORC_MAJORMINOR@_la_LIBADD = $(ORC_LIBS) +liborc_float_@ORC_MAJORMINOR@_la_LDFLAGS = \ + -no-undefined -export-symbols-regex 'orc_' +liborc_float_@ORC_MAJORMINOR@_la_CFLAGS = $(ORC_CFLAGS) + +liborc_float_@ORC_MAJORMINOR@_la_SOURCES = \ + orcfloat.c orcfloat-sse.c + +pkginclude_HEADERS = \ + orcfloat.h + diff --git a/orc-float/README b/orc-float/README new file mode 100644 index 0000000..359257c --- /dev/null +++ b/orc-float/README @@ -0,0 +1,76 @@ + +orc-float +========= + +See the notes about orc-pixel. + +Specifying floating point parameters requires you to use the integer +that corresponds to the bit pattern of the floating point number you +want. + +The SSE backend generates nearly correct output, however, it's not +bit exact with the C code for some operations. Of course, it's +notoriously difficult to get bit-exact floating point code in C. + + + +diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am +index d4220db..a754193 100644 +--- a/testsuite/Makefile.am ++++ b/testsuite/Makefile.am +@@ -4,5 +4,6 @@ TESTS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile t + orcbin_PROGRAMS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile test_accsadubl test-schro + + AM_CFLAGS = $(ORC_CFLAGS) +-LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-0.3.la ++LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-0.3.la \ ++ $(top_builddir)/orc-float/liborc-float-0.3.la + +diff --git a/testsuite/test_compile.c b/testsuite/test_compile.c +index f95aeb5..a3f58e2 100644 +--- a/testsuite/test_compile.c ++++ b/testsuite/test_compile.c +@@ -6,6 +6,7 @@ + + #include + #include ++#include + + + int error = FALSE; +@@ -22,8 +23,9 @@ main (int argc, char *argv[]) + + orc_init(); + orc_test_init(); ++ orc_float_init(); + +- opcode_set = orc_opcode_set_get ("sys"); ++ opcode_set = orc_opcode_set_get ("float"); + + for(i=0;in_opcodes;i++){ + printf("/* %s %d,%d,%d %p */\n", +diff --git a/testsuite/test_local_opcode_execution.c b/testsuite/test_local_opcode_execution.c +index fc62a49..17230d6 100644 +--- a/testsuite/test_local_opcode_execution.c ++++ b/testsuite/test_local_opcode_execution.c +@@ -5,6 +5,7 @@ + + #include + #include ++#include + + + int error = FALSE; +@@ -20,10 +21,11 @@ main (int argc, char *argv[]) + int i; + OrcOpcodeSet *opcode_set; + ++ orc_float_init(); + orc_test_init(); + orc_init(); + +- opcode_set = orc_opcode_set_get ("sys"); ++ opcode_set = orc_opcode_set_get ("float"); + + for(i=0;in_opcodes;i++){ + printf("/* %s src %d,%d,%d */\n", diff --git a/orc-float/orcfloat-sse.c b/orc-float/orcfloat-sse.c new file mode 100644 index 0000000..0a433ea --- /dev/null +++ b/orc-float/orcfloat-sse.c @@ -0,0 +1,334 @@ + +#include +#include +#include + +#include +#include +#include + +#include + +#define X86_MODRM(mod, rm, reg) ((((mod)&3)<<6)|(((rm)&7)<<0)|(((reg)&7)<<3)) + +#ifdef HAVE_AMD64 +int x86_64 = 1; +int x86_ptr_size = 8; +int x86_exec_ptr = X86_EDI; +#else +int x86_64 = 0; +int x86_ptr_size = 4; +int x86_exec_ptr = X86_EBP; +#endif + +const char * +x86_get_regname_sse(int i) +{ + static const char *x86_regs[] = { + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" + }; + + if (i>=X86_XMM0 && i= 8) rex |= 0x08; + if (reg1 == 1 || (x86_get_regnum(reg1)>=8)) rex |= 0x4; + if (reg2 == 1 || (x86_get_regnum(reg2)>=8)) rex |= 0x2; + if (reg3 == 1 || (x86_get_regnum(reg3)>=8)) rex |= 0x1; + + if (rex != 0x40) *compiler->codeptr++ = rex; + } +} + +void +x86_emit_modrm_reg (OrcCompiler *compiler, int reg1, int reg2) +{ + *compiler->codeptr++ = X86_MODRM(3, reg1, reg2); +} + +void +sse_emit_f20f (OrcCompiler *p, const char *insn_name, int code, + int src, int dest) +{ + ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, + x86_get_regname_sse(src), + x86_get_regname_sse(dest)); + *p->codeptr++ = 0xf2; + x86_emit_rex (p, 0, src, 0, dest); + *p->codeptr++ = 0x0f; + *p->codeptr++ = code; + x86_emit_modrm_reg (p, src, dest); +} + +void +sse_emit_f30f (OrcCompiler *p, const char *insn_name, int code, + int src, int dest) +{ + ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, + x86_get_regname_sse(src), + x86_get_regname_sse(dest)); + *p->codeptr++ = 0xf3; + x86_emit_rex (p, 0, src, 0, dest); + *p->codeptr++ = 0x0f; + *p->codeptr++ = code; + x86_emit_modrm_reg (p, src, dest); +} + +void +sse_emit_660f (OrcCompiler *p, const char *insn_name, int code, + int src, int dest) +{ + ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, + x86_get_regname_sse(src), + x86_get_regname_sse(dest)); + *p->codeptr++ = 0x66; + x86_emit_rex (p, 0, src, 0, dest); + *p->codeptr++ = 0x0f; + *p->codeptr++ = code; + x86_emit_modrm_reg (p, src, dest); +} + +void +sse_emit_0f (OrcCompiler *p, const char *insn_name, int code, + int src, int dest) +{ + ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, + x86_get_regname_sse(src), + x86_get_regname_sse(dest)); + x86_emit_rex (p, 0, src, 0, dest); + *p->codeptr++ = 0x0f; + *p->codeptr++ = code; + x86_emit_modrm_reg (p, src, dest); +} + + +#define UNARY(opcode,insn_name,code) \ +static void \ +sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + sse_emit_0f (p, insn_name, code, \ + p->vars[insn->src_args[0]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + +#define BINARY(opcode,insn_name,code) \ +static void \ +sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + sse_emit_0f (p, insn_name, code, \ + p->vars[insn->src_args[1]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + + +BINARY(addf, "addps", 0x58) +BINARY(subf, "subps", 0x5c) +BINARY(mulf, "mulps", 0x59) +BINARY(divf, "divps", 0x5e) +BINARY(maxf, "maxps", 0x5f) +BINARY(minf, "minps", 0x5d) +UNARY(invf, "rcpps", 0x53) +UNARY(sqrtf, "sqrtps", 0x51) +UNARY(invsqrtf, "rsqrtps", 0x52) + +static void +sse_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_0f (p, "cmpeqps", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x00; +} + +static void +sse_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_0f (p, "cmpltps", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x01; +} + +static void +sse_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_0f (p, "cmpleps", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x02; +} + +static void +sse_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_660f (p, "cvtps2dq", 0x5b, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +static void +sse_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_0f (p, "cvtdq2ps", 0x5b, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +#define UNARY_66(opcode,insn_name,code) \ +static void \ +sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + sse_emit_660f (p, insn_name, code, \ + p->vars[insn->src_args[0]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + +#define BINARY_66(opcode,insn_name,code) \ +static void \ +sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ +{ \ + sse_emit_660f (p, insn_name, code, \ + p->vars[insn->src_args[1]].alloc, \ + p->vars[insn->dest_args[0]].alloc); \ +} + +BINARY_66(addg, "addpd", 0x58) +BINARY_66(subg, "subpd", 0x5c) +BINARY_66(mulg, "mulpd", 0x59) +BINARY_66(divg, "divpd", 0x5e) +BINARY_66(maxg, "maxpd", 0x5f) +BINARY_66(ming, "minpd", 0x5d) +#if 0 +/* These don't actually exist */ +UNARY_66(invg, "rcppd", 0x53) +UNARY_66(sqrtg, "sqrtpd", 0x51) +UNARY_66(invsqrtg, "rsqrtpd", 0x52) +#endif + +static void +sse_rule_cmpeqg (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_660f (p, "cmpeqpd", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x00; +} + +static void +sse_rule_cmpltg (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_660f (p, "cmpltpd", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x01; +} + +static void +sse_rule_cmpleg (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_660f (p, "cmplepd", 0xc2, + p->vars[insn->src_args[1]].alloc, + p->vars[insn->dest_args[0]].alloc); + *p->codeptr++ = 0x02; +} + +static void +sse_rule_convgl (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_f20f (p, "cvtpd2dq", 0xe6, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +static void +sse_rule_convlg (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_f30f (p, "cvtdq2pd", 0xe6, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +static void +sse_rule_convgf (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_660f (p, "cvtpd2ps", 0x5a, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + +static void +sse_rule_convfg (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + sse_emit_0f (p, "cvtps2pd", 0x5a, + p->vars[insn->src_args[0]].alloc, + p->vars[insn->dest_args[0]].alloc); +} + + +void +orc_float_sse_register_rules (OrcTarget *target) +{ + OrcRuleSet *rule_set; + + rule_set = orc_rule_set_new (orc_opcode_set_get("float"), + orc_target_get_by_name ("sse")); + + orc_rule_register (rule_set, "addf", sse_rule_addf, NULL); + orc_rule_register (rule_set, "subf", sse_rule_subf, NULL); + orc_rule_register (rule_set, "mulf", sse_rule_mulf, NULL); + orc_rule_register (rule_set, "divf", sse_rule_divf, NULL); + orc_rule_register (rule_set, "minf", sse_rule_minf, NULL); + orc_rule_register (rule_set, "maxf", sse_rule_maxf, NULL); + orc_rule_register (rule_set, "invf", sse_rule_invf, NULL); + orc_rule_register (rule_set, "sqrtf", sse_rule_sqrtf, NULL); + orc_rule_register (rule_set, "invsqrtf", sse_rule_invsqrtf, NULL); + orc_rule_register (rule_set, "cmpeqf", sse_rule_cmpeqf, NULL); + orc_rule_register (rule_set, "cmpltf", sse_rule_cmpltf, NULL); + orc_rule_register (rule_set, "cmplef", sse_rule_cmplef, NULL); + orc_rule_register (rule_set, "convfl", sse_rule_convfl, NULL); + orc_rule_register (rule_set, "convlf", sse_rule_convlf, NULL); + + orc_rule_register (rule_set, "addg", sse_rule_addg, NULL); + orc_rule_register (rule_set, "subg", sse_rule_subg, NULL); + orc_rule_register (rule_set, "mulg", sse_rule_mulg, NULL); + orc_rule_register (rule_set, "divg", sse_rule_divg, NULL); + orc_rule_register (rule_set, "ming", sse_rule_ming, NULL); + orc_rule_register (rule_set, "maxg", sse_rule_maxg, NULL); +#if 0 +/* These don't actually exist */ + orc_rule_register (rule_set, "invg", sse_rule_invg, NULL); + orc_rule_register (rule_set, "sqrtg", sse_rule_sqrtg, NULL); + orc_rule_register (rule_set, "invsqrtg", sse_rule_invsqrtg, NULL); +#endif + orc_rule_register (rule_set, "cmpeqg", sse_rule_cmpeqg, NULL); + orc_rule_register (rule_set, "cmpltg", sse_rule_cmpltg, NULL); + orc_rule_register (rule_set, "cmpleg", sse_rule_cmpleg, NULL); + orc_rule_register (rule_set, "convgl", sse_rule_convgl, NULL); + orc_rule_register (rule_set, "convlg", sse_rule_convlg, NULL); + + orc_rule_register (rule_set, "convgf", sse_rule_convgf, NULL); + orc_rule_register (rule_set, "convfg", sse_rule_convfg, NULL); +} + + diff --git a/orc-float/orcfloat.c b/orc-float/orcfloat.c new file mode 100644 index 0000000..3b66c6b --- /dev/null +++ b/orc-float/orcfloat.c @@ -0,0 +1,188 @@ + +#include +#include +#include + +#include +#include +#include +#include + +static OrcStaticOpcode opcodes[]; + +void orc_float_sse_register_rules (void); + +void +orc_float_init (void) +{ + orc_init (); + + orc_opcode_register_static (opcodes, "float"); + orc_float_sse_register_rules (); +} + +#define ORC_FLOAT_READ(addr) (*(float *)(addr)) +#define ORC_FLOAT_WRITE(addr,value) do{ (*(float *)(addr)) = (value); }while(0) + +#define UNARY_F(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + float a = ORC_FLOAT_READ(&ex->src_values[0]); \ + ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \ +} + +#define BINARY_F(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + float a = ORC_FLOAT_READ(&ex->src_values[0]); \ + float b = ORC_FLOAT_READ(&ex->src_values[1]); \ + ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \ +} + +#define BINARY_FL(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + float a = ORC_FLOAT_READ(&ex->src_values[0]); \ + float b = ORC_FLOAT_READ(&ex->src_values[1]); \ + ex->dest_values[0] = code ; \ +} + +BINARY_F(addf, a + b) +BINARY_F(subf, a - b) +BINARY_F(mulf, a * b) +BINARY_F(divf, a / b) +UNARY_F(invf, (1.0f/a) ) +UNARY_F(orc_sqrtf, sqrtf(a) ) +BINARY_F(maxf, (a>b) ? a : b) +BINARY_F(minf, (adest_values[0] = ORC_FLOAT_READ(&ex->src_values[0]); +} + +static void +convlf (OrcOpcodeExecutor *ex, void *user) +{ + ORC_FLOAT_WRITE(&ex->dest_values[0], ex->src_values[0]); +} + +#define ORC_DOUBLE_READ(addr) (*(double *)(addr)) +#define ORC_DOUBLE_WRITE(addr,value) do{ (*(double *)(addr)) = (value); }while(0) + +#define UNARY_G(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + double a = ORC_DOUBLE_READ(&ex->src_values[0]); \ + ORC_DOUBLE_WRITE(&ex->dest_values[0], code ); \ +} + +#define BINARY_G(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + double a = ORC_DOUBLE_READ(&ex->src_values[0]); \ + double b = ORC_DOUBLE_READ(&ex->src_values[1]); \ + ORC_DOUBLE_WRITE(&ex->dest_values[0], code ); \ +} + +#define BINARY_GQ(name,code) \ +static void \ +name (OrcOpcodeExecutor *ex, void *user) \ +{ \ + double a = ORC_DOUBLE_READ(&ex->src_values[0]); \ + double b = ORC_DOUBLE_READ(&ex->src_values[1]); \ + ex->dest_values[0] = code ; \ +} + +BINARY_G(addg, a + b) +BINARY_G(subg, a - b) +BINARY_G(mulg, a * b) +BINARY_G(divg, a / b) +UNARY_G(invg, (1.0f/a) ) +UNARY_G(sqrtg, sqrt(a) ) +BINARY_G(maxg, (a>b) ? a : b) +BINARY_G(ming, (adest_values[0] = ORC_DOUBLE_READ(&ex->src_values[0]); +} + +static void +convlg (OrcOpcodeExecutor *ex, void *user) +{ + ORC_DOUBLE_WRITE(&ex->dest_values[0], ex->src_values[0]); +} + +static void +convgf (OrcOpcodeExecutor *ex, void *user) +{ + ORC_FLOAT_WRITE(&ex->dest_values[0], ORC_DOUBLE_READ(&ex->src_values[0])); +} + +static void +convfg (OrcOpcodeExecutor *ex, void *user) +{ + ORC_DOUBLE_WRITE(&ex->dest_values[0], ORC_FLOAT_READ(&ex->src_values[0])); +} + + + +static OrcStaticOpcode opcodes[] = { + { "addf", addf, NULL, 0, { 4 }, { 4, 4 } }, + { "subf", subf, NULL, 0, { 4 }, { 4, 4 } }, + { "mulf", mulf, NULL, 0, { 4 }, { 4, 4 } }, + { "divf", divf, NULL, 0, { 4 }, { 4, 4 } }, + { "invf", invf, NULL, 0, { 4 }, { 4 } }, + { "sqrtf", orc_sqrtf, NULL, 0, { 4 }, { 4 } }, + { "maxf", maxf, NULL, 0, { 4 }, { 4, 4 } }, + { "minf", minf, NULL, 0, { 4 }, { 4, 4 } }, + { "invsqrtf", invsqrtf, NULL, 0, { 4 }, { 4 } }, + + { "cmpeqf", cmpeqf, NULL, 0, { 4 }, { 4, 4 } }, + { "cmpltf", cmpltf, NULL, 0, { 4 }, { 4, 4 } }, + { "cmplef", cmplef, NULL, 0, { 4 }, { 4, 4 } }, + + { "convfl", convfl, NULL, 0, { 4 }, { 4 } }, + { "convlf", convlf, NULL, 0, { 4 }, { 4 } }, + + { "addg", addg, NULL, 0, { 8 }, { 8, 8 } }, + { "subg", subg, NULL, 0, { 8 }, { 8, 8 } }, + { "mulg", mulg, NULL, 0, { 8 }, { 8, 8 } }, + { "divg", divg, NULL, 0, { 8 }, { 8, 8 } }, + { "invg", invg, NULL, 0, { 8 }, { 8 } }, + { "sqrtg", sqrtg, NULL, 0, { 8 }, { 8 } }, + { "maxg", maxg, NULL, 0, { 8 }, { 8, 8 } }, + { "ming", ming, NULL, 0, { 8 }, { 8, 8 } }, + { "invsqrtg", invsqrtg, NULL, 0, { 8 }, { 8 } }, + + { "cmpeqg", cmpeqg, NULL, 0, { 8 }, { 8, 8 } }, + { "cmpltg", cmpltg, NULL, 0, { 8 }, { 8, 8 } }, + { "cmpleg", cmpleg, NULL, 0, { 8 }, { 8, 8 } }, + + { "convgl", convgl, NULL, 0, { 4 }, { 8 } }, + { "convlg", convlg, NULL, 0, { 8 }, { 4 } }, + + { "convgf", convgf, NULL, 0, { 4 }, { 8 } }, + { "convfg", convfg, NULL, 0, { 8 }, { 4 } }, + + { "" } +}; + diff --git a/orc-float/orcfloat.h b/orc-float/orcfloat.h new file mode 100644 index 0000000..8db126f --- /dev/null +++ b/orc-float/orcfloat.h @@ -0,0 +1,15 @@ + +#ifndef _ORC_FLOAT_FLOAT_H_ +#define _ORC_FLOAT_FLOAT_H_ + +#include +#include + +ORC_BEGIN_DECLS + +void orc_float_init (void); + +ORC_END_DECLS + +#endif + diff --git a/orc-test/orctest.c b/orc-test/orctest.c index fb1028e..88e4564 100644 --- a/orc-test/orctest.c +++ b/orc-test/orctest.c @@ -82,11 +82,20 @@ orc_test_gcc_compile (OrcProgram *p) void orc_test_random_bits (void *data, int n_bytes) { +#if 1 uint8_t *d = data; int i; for(i=0;ivars[insn->src_args[k]].size); } @@ -172,6 +175,9 @@ orc_executor_emulate (OrcExecutor *ex) case 4: *(int32_t *)ptr = opcode_ex.dest_values[k]; break; + case 8: + *(int64_t *)ptr = opcode_ex.dest_values[k]; + break; default: ORC_ERROR("unhandled size %d", program->vars[insn->dest_args[k]].size); } diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 01ae501..744f500 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -103,6 +103,9 @@ orc_compiler_sse_init (OrcCompiler *compiler) case 4: compiler->loop_shift = 2; break; + case 8: + compiler->loop_shift = 1; + break; default: ORC_ERROR("unhandled max var size %d", orc_program_get_max_var_size (compiler->program)); @@ -228,6 +231,8 @@ sse_load_constants (OrcCompiler *compiler) sse_emit_loadpw (compiler, compiler->vars[i].alloc, i); } else if (compiler->vars[i].size == 4) { sse_emit_loadpl (compiler, compiler->vars[i].alloc, i); + } else if (compiler->vars[i].size == 8) { + sse_emit_loadpq (compiler, compiler->vars[i].alloc, i); } else { ORC_PROGRAM_ERROR(compiler, "unimplemented"); } @@ -370,6 +375,8 @@ get_shift (int size) return 1; case 4: return 2; + case 8: + return 3; default: ORC_ERROR("bad size %d", size); } diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index e1b4d75..0f15980 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -191,6 +191,28 @@ sse_emit_loadpl (OrcCompiler *p, int reg, int param) *p->codeptr++ = 0x00; } +void +sse_emit_loadpq (OrcCompiler *p, int reg, int param) +{ + ORC_ASM_CODE(p," movq %d(%%%s), %%%s\n", + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]), + x86_get_regname_ptr(x86_exec_ptr), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0xf3; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x7e; + x86_emit_modrm_memoffset (p, reg, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]), x86_exec_ptr); + + ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x70; + x86_emit_modrm_reg (p, reg, reg); + *p->codeptr++ = 0x00; +} + static void sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn) { diff --git a/orc/x86.h b/orc/x86.h index 2090911..5b3df04 100644 --- a/orc/x86.h +++ b/orc/x86.h @@ -68,6 +68,7 @@ void sse_emit_loadil (OrcCompiler *p, int reg, int value); void sse_emit_loadpb (OrcCompiler *p, int reg, int value); void sse_emit_loadpw (OrcCompiler *p, int reg, int value); void sse_emit_loadpl (OrcCompiler *p, int reg, int value); +void sse_emit_loadpq (OrcCompiler *p, int reg, int value); void sse_emit_660f (OrcCompiler *p, const char *insn_name, int code, int src, int dest); -- 2.7.4