UNARY_LW(select1lw, "((orc_uint32)%s >> 16)&0xffff")
UNARY_UW(swapw, "ORC_SWAP_W(%s)")
UNARY_UL(swapl, "ORC_SWAP_L(%s)")
+UNARY_UQ(swapq, "ORC_SWAP_Q(%s)")
BINARY_F(addf, "ORC_DENORMAL(ORC_DENORMAL(%s) + ORC_DENORMAL(%s))")
BINARY_F(subf, "ORC_DENORMAL(ORC_DENORMAL(%s) - ORC_DENORMAL(%s))")
#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))
#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))
+#define ORC_SWAP_Q(x) ((((x)&0xffU)<<56) | (((x)&0xff00U)<<40) | (((x)&0xff0000U)<<24) | (((x)&0xff000000U)<<8) | (((x)&0xff00000000U)>>8) | (((x)&0xff0000000000U)>>24) | (((x)&0xff000000000000U)>>40) | (((x)&0xff00000000000000U)>>56))
#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
#define ORC_MIN_NORMAL (1.1754944909521339405e-38)
#define ORC_RECAST_INT(x) (((orc_union32)(x)).i)
}
void
+emulate_swapq (OrcOpcodeExecutor *ex, int offset, int n)
+{
+ int i;
+ orc_union64 * ORC_RESTRICT ptr0;
+ const orc_union64 * ORC_RESTRICT ptr4;
+ orc_union64 var32;
+ orc_union64 var33;
+
+ ptr0 = (orc_union64 *)ex->dest_ptrs[0];
+ ptr4 = (orc_union64 *)ex->src_ptrs[0];
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadq */
+ var32 = ptr4[i];
+ /* 1: swapq */
+ var33.i = ORC_SWAP_Q(var32.i);
+ /* 2: storeq */
+ ptr0[i] = var33;
+ }
+
+}
+
+void
emulate_select0wb (OrcOpcodeExecutor *ex, int offset, int n)
{
int i;
void emulate_accsadubl (OrcOpcodeExecutor *ex, int i, int n);
void emulate_swapw (OrcOpcodeExecutor *ex, int i, int n);
void emulate_swapl (OrcOpcodeExecutor *ex, int i, int n);
+void emulate_swapq (OrcOpcodeExecutor *ex, int i, int n);
void emulate_select0wb (OrcOpcodeExecutor *ex, int i, int n);
void emulate_select1wb (OrcOpcodeExecutor *ex, int i, int n);
void emulate_select0lw (OrcOpcodeExecutor *ex, int i, int n);
#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))
#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))
+#define ORC_SWAP_Q(x) ((((x)&0xffU)<<56) | (((x)&0xff00U)<<40) | (((x)&0xff0000U)<<24) | (((x)&0xff000000U)<<8) | (((x)&0xff00000000U)>>8) | (((x)&0xff0000000000U)>>24) | (((x)&0xff000000000000U)>>40) | (((x)&0xff00000000000000U)>>56))
#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
#define ORC_MIN_NORMAL (1.1754944909521339405e-38)
#define ORC_RECAST_INT(x) (((orc_union32)(x)).i)
{ "swapw", 0, { 2 }, { 2 }, emulate_swapw },
{ "swapl", 0, { 4 }, { 4 }, emulate_swapl },
+ { "swapq", 0, { 8 }, { 8 }, emulate_swapq },
{ "select0wb", 0, { 1 }, { 2 }, emulate_select0wb },
{ "select1wb", 0, { 1 }, { 2 }, emulate_select1wb },
{ "select0lw", 0, { 2 }, { 4 }, emulate_select0lw },
"#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)\n"
"#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))\n"
"#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))\n"
+ "#define ORC_SWAP_Q(x) ((((x)&0xffU)<<56) | (((x)&0xff00U)<<40) | (((x)&0xff0000U)<<24) | (((x)&0xff000000U)<<8) | (((x)&0xff00000000U)>>8) | (((x)&0xff0000000000U)>>24) | (((x)&0xff000000000000U)>>40) | (((x)&0xff00000000000000U)>>56))\n"
"#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))\n"
"#define ORC_MIN_NORMAL (1.1754944909521339405e-38)\n"
"#define ORC_RECAST_INT(x) (((orc_union32)(x)).i)\n"
orc_sse_emit_por (p, tmp, dest);
}
+static void
+sse_rule_swapq (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ int src = p->vars[insn->src_args[0]].alloc;
+ int dest = p->vars[insn->dest_args[0]].alloc;
+ int tmp = orc_compiler_get_temp_reg (p);
+
+ orc_sse_emit_movdqa (p, src, tmp);
+ orc_sse_emit_psllq (p, 32, tmp);
+ orc_sse_emit_psrlq (p, 32, dest);
+ orc_sse_emit_por (p, tmp, dest);
+ orc_sse_emit_movdqa (p, dest, tmp);
+ orc_sse_emit_pslld (p, 16, tmp);
+ orc_sse_emit_psrld (p, 16, dest);
+ orc_sse_emit_por (p, tmp, dest);
+ orc_sse_emit_movdqa (p, dest, tmp);
+ orc_sse_emit_psllw (p, 8, tmp);
+ orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_por (p, tmp, dest);
+}
+
#define LOAD_MASK_IS_SLOW
#ifndef LOAD_MASK_IS_SLOW
static void
orc_rule_register (rule_set, "absl", sse_rule_absl_slow, NULL);
orc_rule_register (rule_set, "swapw", sse_rule_swapw, NULL);
orc_rule_register (rule_set, "swapl", sse_rule_swapl, NULL);
+ orc_rule_register (rule_set, "swapq", sse_rule_swapq, NULL);
orc_rule_register (rule_set, "splitlw", sse_rule_splitlw, NULL);
orc_rule_register (rule_set, "splitwb", sse_rule_splitwb, NULL);
orc_rule_register (rule_set, "avgsl", sse_rule_avgsl, NULL);