From d919f8a2075679761c086931fc73e3b82965d3af Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sebastian=20Dr=C3=B6ge?= Date: Thu, 26 Aug 2010 16:40:22 +0200 Subject: [PATCH] Add swapq opcode --- orc/opcodes.h | 1 + orc/orcemulateopcodes.c | 24 ++++++++++++++++++++++++ orc/orcemulateopcodes.h | 1 + orc/orcfunctions.c | 1 + orc/orcopcodes.c | 1 + orc/orcprogram-c.c | 1 + orc/orcrules-sse.c | 22 ++++++++++++++++++++++ 7 files changed, 51 insertions(+) diff --git a/orc/opcodes.h b/orc/opcodes.h index 80cef73..d9c28c7 100644 --- a/orc/opcodes.h +++ b/orc/opcodes.h @@ -121,6 +121,7 @@ UNARY_LW(select0lw, "(orc_uint32)%s & 0xffff") UNARY_LW(select1lw, "((orc_uint32)%s >> 16)&0xffff") UNARY_UW(swapw, "ORC_SWAP_W(%s)") UNARY_UL(swapl, "ORC_SWAP_L(%s)") +UNARY_UQ(swapq, "ORC_SWAP_Q(%s)") BINARY_F(addf, "ORC_DENORMAL(ORC_DENORMAL(%s) + ORC_DENORMAL(%s))") BINARY_F(subf, "ORC_DENORMAL(ORC_DENORMAL(%s) - ORC_DENORMAL(%s))") diff --git a/orc/orcemulateopcodes.c b/orc/orcemulateopcodes.c index 3bd7175..a0d95eb 100644 --- a/orc/orcemulateopcodes.c +++ b/orc/orcemulateopcodes.c @@ -33,6 +33,7 @@ #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) #define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8)) #define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24)) +#define ORC_SWAP_Q(x) ((((x)&0xffU)<<56) | (((x)&0xff00U)<<40) | (((x)&0xff0000U)<<24) | (((x)&0xff000000U)<<8) | (((x)&0xff00000000U)>>8) | (((x)&0xff0000000000U)>>24) | (((x)&0xff000000000000U)>>40) | (((x)&0xff00000000000000U)>>56)) #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) #define ORC_MIN_NORMAL (1.1754944909521339405e-38) #define ORC_RECAST_INT(x) (((orc_union32)(x)).i) @@ -3525,6 +3526,29 @@ emulate_swapl (OrcOpcodeExecutor *ex, int offset, int n) } void +emulate_swapq (OrcOpcodeExecutor *ex, int offset, int n) +{ + int i; + orc_union64 * ORC_RESTRICT ptr0; + const orc_union64 * ORC_RESTRICT ptr4; + orc_union64 var32; + orc_union64 var33; + + ptr0 = (orc_union64 *)ex->dest_ptrs[0]; + ptr4 = (orc_union64 *)ex->src_ptrs[0]; + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var32 = ptr4[i]; + /* 1: swapq */ + var33.i = ORC_SWAP_Q(var32.i); + /* 2: storeq */ + ptr0[i] = var33; + } + +} + +void emulate_select0wb (OrcOpcodeExecutor *ex, int offset, int n) { int i; diff --git a/orc/orcemulateopcodes.h b/orc/orcemulateopcodes.h index 5ea3a79..9ab736e 100644 --- a/orc/orcemulateopcodes.h +++ b/orc/orcemulateopcodes.h @@ -143,6 +143,7 @@ void emulate_accl (OrcOpcodeExecutor *ex, int i, int n); void emulate_accsadubl (OrcOpcodeExecutor *ex, int i, int n); void emulate_swapw (OrcOpcodeExecutor *ex, int i, int n); void emulate_swapl (OrcOpcodeExecutor *ex, int i, int n); +void emulate_swapq (OrcOpcodeExecutor *ex, int i, int n); void emulate_select0wb (OrcOpcodeExecutor *ex, int i, int n); void emulate_select1wb (OrcOpcodeExecutor *ex, int i, int n); void emulate_select0lw (OrcOpcodeExecutor *ex, int i, int n); diff --git a/orc/orcfunctions.c b/orc/orcfunctions.c index 24e84e2..80dc08f 100644 --- a/orc/orcfunctions.c +++ b/orc/orcfunctions.c @@ -79,6 +79,7 @@ void orc_memset (void * d1, int p1, int n); #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) #define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8)) #define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24)) +#define ORC_SWAP_Q(x) ((((x)&0xffU)<<56) | (((x)&0xff00U)<<40) | (((x)&0xff0000U)<<24) | (((x)&0xff000000U)<<8) | (((x)&0xff00000000U)>>8) | (((x)&0xff0000000000U)>>24) | (((x)&0xff000000000000U)>>40) | (((x)&0xff00000000000000U)>>56)) #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) #define ORC_MIN_NORMAL (1.1754944909521339405e-38) #define ORC_RECAST_INT(x) (((orc_union32)(x)).i) diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index dd9e766..6c2a0a6 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -442,6 +442,7 @@ static OrcStaticOpcode opcodes[] = { { "swapw", 0, { 2 }, { 2 }, emulate_swapw }, { "swapl", 0, { 4 }, { 4 }, emulate_swapl }, + { "swapq", 0, { 8 }, { 8 }, emulate_swapq }, { "select0wb", 0, { 1 }, { 2 }, emulate_select0wb }, { "select1wb", 0, { 1 }, { 2 }, emulate_select1wb }, { "select0lw", 0, { 2 }, { 4 }, emulate_select0lw }, diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c index ac72f91..5513b40 100644 --- a/orc/orcprogram-c.c +++ b/orc/orcprogram-c.c @@ -101,6 +101,7 @@ orc_target_c_get_asm_preamble (void) "#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)\n" "#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))\n" "#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))\n" + "#define ORC_SWAP_Q(x) ((((x)&0xffU)<<56) | (((x)&0xff00U)<<40) | (((x)&0xff0000U)<<24) | (((x)&0xff000000U)<<8) | (((x)&0xff00000000U)>>8) | (((x)&0xff0000000000U)>>24) | (((x)&0xff000000000000U)>>40) | (((x)&0xff00000000000000U)>>56))\n" "#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))\n" "#define ORC_MIN_NORMAL (1.1754944909521339405e-38)\n" "#define ORC_RECAST_INT(x) (((orc_union32)(x)).i)\n" diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 24043c9..e6bbbea 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -1437,6 +1437,27 @@ sse_rule_swapl (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_por (p, tmp, dest); } +static void +sse_rule_swapq (OrcCompiler *p, void *user, OrcInstruction *insn) +{ + int src = p->vars[insn->src_args[0]].alloc; + int dest = p->vars[insn->dest_args[0]].alloc; + int tmp = orc_compiler_get_temp_reg (p); + + orc_sse_emit_movdqa (p, src, tmp); + orc_sse_emit_psllq (p, 32, tmp); + orc_sse_emit_psrlq (p, 32, dest); + orc_sse_emit_por (p, tmp, dest); + orc_sse_emit_movdqa (p, dest, tmp); + orc_sse_emit_pslld (p, 16, tmp); + orc_sse_emit_psrld (p, 16, dest); + orc_sse_emit_por (p, tmp, dest); + orc_sse_emit_movdqa (p, dest, tmp); + orc_sse_emit_psllw (p, 8, tmp); + orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_por (p, tmp, dest); +} + #define LOAD_MASK_IS_SLOW #ifndef LOAD_MASK_IS_SLOW static void @@ -2193,6 +2214,7 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, "absl", sse_rule_absl_slow, NULL); orc_rule_register (rule_set, "swapw", sse_rule_swapw, NULL); orc_rule_register (rule_set, "swapl", sse_rule_swapl, NULL); + orc_rule_register (rule_set, "swapq", sse_rule_swapq, NULL); orc_rule_register (rule_set, "splitlw", sse_rule_splitlw, NULL); orc_rule_register (rule_set, "splitwb", sse_rule_splitwb, NULL); orc_rule_register (rule_set, "avgsl", sse_rule_avgsl, NULL); -- 2.7.4