case 2:
orc_sse_emit_pinsrw_memoffset (compiler, 0, offset, ptr_reg, dest->alloc);
orc_sse_emit_movdqa (compiler, dest->alloc, tmp);
- orc_sse_emit_psrlw (compiler, 8, tmp);
+ orc_sse_emit_psrlw_imm (compiler, 8, tmp);
break;
case 4:
orc_sse_emit_pinsrw_memoffset (compiler, 0, offset, ptr_reg, dest->alloc);
#if 0
orc_sse_emit_movdqa (compiler, X86_XMM6, tmp);
- orc_sse_emit_pslld (compiler, 10, tmp);
- orc_sse_emit_psrld (compiler, 26, tmp);
- orc_sse_emit_pslld (compiler, 2, tmp);
+ orc_sse_emit_pslld_imm (compiler, 10, tmp);
+ orc_sse_emit_psrld_imm (compiler, 26, tmp);
+ orc_sse_emit_pslld_imm (compiler, 2, tmp);
orc_sse_emit_movdqa (compiler, tmp, tmp2);
- orc_sse_emit_pslld (compiler, 8, tmp2);
+ orc_sse_emit_pslld_imm (compiler, 8, tmp2);
orc_sse_emit_por (compiler, tmp2, tmp);
orc_sse_emit_movdqa (compiler, tmp, tmp2);
- orc_sse_emit_pslld (compiler, 16, tmp2);
+ orc_sse_emit_pslld_imm (compiler, 16, tmp2);
orc_sse_emit_por (compiler, tmp2, tmp);
#else
orc_sse_emit_movdqa (compiler, X86_XMM6, tmp);
orc_sse_emit_pshufb (compiler, tmp, dest->alloc);
orc_sse_emit_movdqa (compiler, X86_XMM7, tmp);
- orc_sse_emit_pslld (compiler, compiler->loop_shift, tmp);
+ orc_sse_emit_pslld_imm (compiler, compiler->loop_shift, tmp);
orc_sse_emit_paddd (compiler, tmp, X86_XMM6);
src->ptr_register, compiler->gp_tmpreg, 2, tmp, FALSE);
#ifdef MMX
//orc_mmx_emit_punpckldq (compiler, tmp, dest->alloc);
- orc_sse_emit_psllq (compiler, 8*4*i, tmp);
+ orc_sse_emit_psllq_imm (compiler, 8*4*i, tmp);
orc_sse_emit_por (compiler, tmp, dest->alloc);
#else
- orc_sse_emit_pslldq (compiler, 4*i, tmp);
+ orc_sse_emit_pslldq_imm (compiler, 4*i, tmp);
orc_sse_emit_por (compiler, tmp, dest->alloc);
#endif
}
orc_x86_emit_mov_reg_sse (compiler, src->ptr_offset, tmp);
orc_sse_emit_pshuflw (compiler, ORC_SSE_SHUF(0,0,0,0), tmp, tmp);
- orc_sse_emit_psrlw (compiler, 8, tmp);
+ orc_sse_emit_psrlw_imm (compiler, 8, tmp);
orc_sse_emit_pmullw (compiler, tmp2, tmp);
- orc_sse_emit_psraw (compiler, 8, tmp);
+ orc_sse_emit_psraw_imm (compiler, 8, tmp);
orc_sse_emit_pxor (compiler, tmp2, tmp2);
orc_sse_emit_packsswb (compiler, tmp2, tmp);
orc_sse_emit_pshuflw (compiler, ORC_SSE_SHUF(1,1,0,0), tmp4, tmp4);
orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,1,0,0), tmp4, tmp4);
#endif
- orc_sse_emit_psrlw (compiler, 8, tmp4);
+ orc_sse_emit_psrlw_imm (compiler, 8, tmp4);
orc_sse_emit_pmullw (compiler, tmp4, tmp2);
- orc_sse_emit_psraw (compiler, 8, tmp2);
+ orc_sse_emit_psraw_imm (compiler, 8, tmp2);
orc_sse_emit_pxor (compiler, tmp, tmp);
orc_sse_emit_packsswb (compiler, tmp, tmp2);
if (i != 0) {
- orc_sse_emit_pslldq (compiler, 8, tmp2);
+ orc_sse_emit_pslldq_imm (compiler, 8, tmp2);
}
orc_sse_emit_paddb (compiler, tmp2, dest->alloc);
orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp);
orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp);
- orc_mmx_emit_psrlw (compiler, 8, tmp);
+ orc_mmx_emit_psrlw_imm (compiler, 8, tmp);
orc_mmx_emit_pmullw (compiler, tmp2, tmp);
- orc_mmx_emit_psraw (compiler, 8, tmp);
+ orc_mmx_emit_psraw_imm (compiler, 8, tmp);
orc_mmx_emit_pxor (compiler, tmp2, tmp2);
orc_mmx_emit_packsswb (compiler, tmp2, tmp);
orc_x86_emit_mov_memoffset_mmx (compiler, 4, 0,
src->ptr_register, tmp2, FALSE);
orc_mmx_emit_paddb (compiler, tmp, tmp2);
- orc_mmx_emit_psllq (compiler, 32, tmp2);
+ orc_mmx_emit_psllq_imm (compiler, 32, tmp2);
orc_mmx_emit_por (compiler, tmp2, dest->alloc);
}
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- orc_sse_emit_660f (p, insn_name, code, \
+ orc_sse_emit_ ## insn_name (p, \
p->vars[insn->src_args[0]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- orc_sse_emit_660f (p, insn_name, code, \
+ orc_sse_emit_ ## insn_name (p, \
p->vars[insn->src_args[1]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
-UNARY(absb,"pabsb",0x381c)
-BINARY(addb,"paddb",0xfc)
-BINARY(addssb,"paddsb",0xec)
-BINARY(addusb,"paddusb",0xdc)
-BINARY(andb,"pand",0xdb)
-BINARY(andnb,"pandn",0xdf)
-BINARY(avgub,"pavgb",0xe0)
-BINARY(cmpeqb,"pcmpeqb",0x74)
-BINARY(cmpgtsb,"pcmpgtb",0x64)
-BINARY(maxsb,"pmaxsb",0x383c)
-BINARY(maxub,"pmaxub",0xde)
-BINARY(minsb,"pminsb",0x3838)
-BINARY(minub,"pminub",0xda)
-//BINARY(mullb,"pmullb",0xd5)
-//BINARY(mulhsb,"pmulhb",0xe5)
-//BINARY(mulhub,"pmulhub",0xe4)
-BINARY(orb,"por",0xeb)
-//UNARY(signb,"psignb",0x3808)
-BINARY(subb,"psubb",0xf8)
-BINARY(subssb,"psubsb",0xe8)
-BINARY(subusb,"psubusb",0xd8)
-BINARY(xorb,"pxor",0xef)
-
-UNARY(absw,"pabsw",0x381d)
-BINARY(addw,"paddw",0xfd)
-BINARY(addssw,"paddsw",0xed)
-BINARY(addusw,"paddusw",0xdd)
-BINARY(andw,"pand",0xdb)
-BINARY(andnw,"pandn",0xdf)
-BINARY(avguw,"pavgw",0xe3)
-BINARY(cmpeqw,"pcmpeqw",0x75)
-BINARY(cmpgtsw,"pcmpgtw",0x65)
-BINARY(maxsw,"pmaxsw",0xee)
-BINARY(maxuw,"pmaxuw",0x383e)
-BINARY(minsw,"pminsw",0xea)
-BINARY(minuw,"pminuw",0x383a)
-BINARY(mullw,"pmullw",0xd5)
-BINARY(mulhsw,"pmulhw",0xe5)
-BINARY(mulhuw,"pmulhuw",0xe4)
-BINARY(orw,"por",0xeb)
-//UNARY(signw,"psignw",0x3809)
-BINARY(subw,"psubw",0xf9)
-BINARY(subssw,"psubsw",0xe9)
-BINARY(subusw,"psubusw",0xd9)
-BINARY(xorw,"pxor",0xef)
-
-UNARY(absl,"pabsd",0x381e)
-BINARY(addl,"paddd",0xfe)
-//BINARY(addssl,"paddsd",0xed)
-//BINARY(addusl,"paddusd",0xdd)
-BINARY(andl,"pand",0xdb)
-BINARY(andnl,"pandn",0xdf)
-//BINARY(avgul,"pavgd",0xe3)
-BINARY(cmpeql,"pcmpeqd",0x76)
-BINARY(cmpgtsl,"pcmpgtd",0x66)
-BINARY(maxsl,"pmaxsd",0x383d)
-BINARY(maxul,"pmaxud",0x383f)
-BINARY(minsl,"pminsd",0x3839)
-BINARY(minul,"pminud",0x383b)
-BINARY(mulll,"pmulld",0x3840)
-//BINARY(mulhsl,"pmulhd",0xe5)
-//BINARY(mulhul,"pmulhud",0xe4)
-BINARY(orl,"por",0xeb)
-//UNARY(signl,"psignd",0x380a)
-BINARY(subl,"psubd",0xfa)
-//BINARY(subssl,"psubsd",0xe9)
-//BINARY(subusl,"psubusd",0xd9)
-BINARY(xorl,"pxor",0xef)
-
-BINARY(andq,"pand",0xdb)
-BINARY(andnq,"pandn",0xdf)
-BINARY(orq,"por",0xeb)
-BINARY(xorq,"pxor",0xef)
-BINARY(cmpeqq,"pcmpeqq",0x3829)
-BINARY(cmpgtsq,"pcmpgtq",0x3837)
+UNARY(absb,pabsb,0x381c)
+BINARY(addb,paddb,0xfc)
+BINARY(addssb,paddsb,0xec)
+BINARY(addusb,paddusb,0xdc)
+BINARY(andb,pand,0xdb)
+BINARY(andnb,pandn,0xdf)
+BINARY(avgub,pavgb,0xe0)
+BINARY(cmpeqb,pcmpeqb,0x74)
+BINARY(cmpgtsb,pcmpgtb,0x64)
+BINARY(maxsb,pmaxsb,0x383c)
+BINARY(maxub,pmaxub,0xde)
+BINARY(minsb,pminsb,0x3838)
+BINARY(minub,pminub,0xda)
+//BINARY(mullb,pmullb,0xd5)
+//BINARY(mulhsb,pmulhb,0xe5)
+//BINARY(mulhub,pmulhub,0xe4)
+BINARY(orb,por,0xeb)
+//UNARY(signb,psignb,0x3808)
+BINARY(subb,psubb,0xf8)
+BINARY(subssb,psubsb,0xe8)
+BINARY(subusb,psubusb,0xd8)
+BINARY(xorb,pxor,0xef)
+
+UNARY(absw,pabsw,0x381d)
+BINARY(addw,paddw,0xfd)
+BINARY(addssw,paddsw,0xed)
+BINARY(addusw,paddusw,0xdd)
+BINARY(andw,pand,0xdb)
+BINARY(andnw,pandn,0xdf)
+BINARY(avguw,pavgw,0xe3)
+BINARY(cmpeqw,pcmpeqw,0x75)
+BINARY(cmpgtsw,pcmpgtw,0x65)
+BINARY(maxsw,pmaxsw,0xee)
+BINARY(maxuw,pmaxuw,0x383e)
+BINARY(minsw,pminsw,0xea)
+BINARY(minuw,pminuw,0x383a)
+BINARY(mullw,pmullw,0xd5)
+BINARY(mulhsw,pmulhw,0xe5)
+BINARY(mulhuw,pmulhuw,0xe4)
+BINARY(orw,por,0xeb)
+//UNARY(signw,psignw,0x3809)
+BINARY(subw,psubw,0xf9)
+BINARY(subssw,psubsw,0xe9)
+BINARY(subusw,psubusw,0xd9)
+BINARY(xorw,pxor,0xef)
+
+UNARY(absl,pabsd,0x381e)
+BINARY(addl,paddd,0xfe)
+//BINARY(addssl,paddsd,0xed)
+//BINARY(addusl,paddusd,0xdd)
+BINARY(andl,pand,0xdb)
+BINARY(andnl,pandn,0xdf)
+//BINARY(avgul,pavgd,0xe3)
+BINARY(cmpeql,pcmpeqd,0x76)
+BINARY(cmpgtsl,pcmpgtd,0x66)
+BINARY(maxsl,pmaxsd,0x383d)
+BINARY(maxul,pmaxud,0x383f)
+BINARY(minsl,pminsd,0x3839)
+BINARY(minul,pminud,0x383b)
+BINARY(mulll,pmulld,0x3840)
+//BINARY(mulhsl,pmulhd,0xe5)
+//BINARY(mulhul,pmulhud,0xe4)
+BINARY(orl,por,0xeb)
+//UNARY(signl,psignd,0x380a)
+BINARY(subl,psubd,0xfa)
+//BINARY(subssl,psubsd,0xe9)
+//BINARY(subusl,psubusd,0xd9)
+BINARY(xorl,pxor,0xef)
+
+BINARY(andq,pand,0xdb)
+BINARY(andnq,pandn,0xdf)
+BINARY(orq,por,0xeb)
+BINARY(xorq,pxor,0xef)
+BINARY(cmpeqq,pcmpeqq,0x3829)
+BINARY(cmpgtsq,pcmpgtq,0x3837)
#ifndef MMX
-BINARY(addq,"paddq",0xd4)
-BINARY(subq,"psubq",0xfb)
+BINARY(addq,paddq,0xd4)
+BINARY(subq,psubq,0xfb)
#endif
static void
#ifndef MMX
if (p->loop_shift == 0) {
- orc_sse_emit_pslldq (p, 12, src);
+ orc_sse_emit_pslldq_imm (p, 12, src);
}
#endif
orc_sse_emit_paddd (p, src, dest);
#ifndef MMX
if (p->loop_shift <= 2) {
orc_sse_emit_movdqa (p, src1, tmp);
- orc_sse_emit_pslldq (p, 16 - (1<<p->loop_shift), tmp);
+ orc_sse_emit_pslldq_imm (p, 16 - (1<<p->loop_shift), tmp);
orc_sse_emit_movdqa (p, src2, tmp2);
- orc_sse_emit_pslldq (p, 16 - (1<<p->loop_shift), tmp2);
+ orc_sse_emit_pslldq_imm (p, 16 - (1<<p->loop_shift), tmp2);
orc_sse_emit_psadbw (p, tmp2, tmp);
} else if (p->loop_shift == 3) {
orc_sse_emit_movdqa (p, src1, tmp);
orc_sse_emit_psadbw (p, src2, tmp);
- orc_sse_emit_pslldq (p, 8, tmp);
+ orc_sse_emit_pslldq_imm (p, 8, tmp);
} else {
orc_sse_emit_movdqa (p, src1, tmp);
orc_sse_emit_psadbw (p, src2, tmp);
#else
if (p->loop_shift <= 2) {
orc_sse_emit_movdqa (p, src1, tmp);
- orc_sse_emit_psllq (p, 8*(8 - (1<<p->loop_shift)), tmp);
+ orc_sse_emit_psllq_imm (p, 8*(8 - (1<<p->loop_shift)), tmp);
orc_sse_emit_movdqa (p, src2, tmp2);
- orc_sse_emit_psllq (p, 8*(8 - (1<<p->loop_shift)), tmp2);
+ orc_sse_emit_psllq_imm (p, 8*(8 - (1<<p->loop_shift)), tmp2);
orc_sse_emit_psadbw (p, tmp2, tmp);
} else {
orc_sse_emit_movdqa (p, src1, tmp);
{
int src = p->vars[insn->src_args[0]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
- const char * names[] = { "psignb", "psignw", "psignd" };
- int codes[] = { 0x3808, 0x3809, 0x380a };
+ int opcodes[] = { ORC_X86_psignb, ORC_X86_psignw, ORC_X86_psignd };
int type = ORC_PTR_TO_INT(user);
int tmpc;
tmpc = orc_compiler_get_temp_constant (p, 1<<type, 1);
if (src == dest) {
- orc_sse_emit_660f (p, names[type], codes[type], src, tmpc);
+ orc_sse_emit_sysinsn (p, opcodes[type], src, tmpc);
orc_sse_emit_movdqa (p, tmpc, dest);
} else {
/* FIXME this would be a good opportunity to not chain src to dest */
orc_sse_emit_movdqa (p, tmpc, dest);
- orc_sse_emit_660f (p, names[type], codes[type], src, dest);
+ orc_sse_emit_sysinsn (p, opcodes[type], src, dest);
}
}
orc_sse_emit_movdqa (p, tmp, dest);
}
- orc_sse_emit_psraw (p, 15, tmp);
+ orc_sse_emit_psraw_imm (p, 15, tmp);
orc_sse_emit_pxor (p, tmp, dest);
orc_sse_emit_psubw (p, tmp, dest);
orc_sse_emit_movdqa (p, tmp, dest);
}
- orc_sse_emit_psrad (p, 31, tmp);
+ orc_sse_emit_psrad_imm (p, 31, tmp);
orc_sse_emit_pxor (p, tmp, dest);
orc_sse_emit_psubd (p, tmp, dest);
sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
{
int type = ORC_PTR_TO_INT(user);
- int imm_code1[] = { 0x71, 0x71, 0x71, 0x72, 0x72, 0x72, 0x73, 0x73 };
- int imm_code2[] = { 6, 2, 4, 6, 2, 4, 6, 2 };
- int reg_code[] = { 0xf1, 0xd1, 0xe1, 0xf2, 0xd2, 0xe2, 0xf3, 0xd3 };
- const char *code[] = { "psllw", "psrlw", "psraw", "pslld", "psrld", "psrad", "psllq", "psrlq" };
+ //int imm_code1[] = { 0x71, 0x71, 0x71, 0x72, 0x72, 0x72, 0x73, 0x73 };
+ //int imm_code2[] = { 6, 2, 4, 6, 2, 4, 6, 2 };
+ //int reg_code[] = { 0xf1, 0xd1, 0xe1, 0xf2, 0xd2, 0xe2, 0xf3, 0xd3 };
+ //const char *code[] = { "psllw", "psrlw", "psraw", "pslld", "psrld", "psrad", "psllq", "psrlq" };
+ const int opcodes[] = { ORC_X86_psllw, ORC_X86_psrlw, ORC_X86_psraw,
+ ORC_X86_pslld, ORC_X86_psrld, ORC_X86_psrad, ORC_X86_psllq,
+ ORC_X86_psrlq };
+ const int opcodes_imm[] = { ORC_X86_psllw_imm, ORC_X86_psrlw_imm,
+ ORC_X86_psraw_imm, ORC_X86_pslld_imm, ORC_X86_psrld_imm,
+ ORC_X86_psrad_imm, ORC_X86_psllq_imm, ORC_X86_psrlq_imm };
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_sse_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type],
+ orc_sse_emit_sysinsn (p, opcodes_imm[type],
p->vars[insn->src_args[1]].value.i,
p->vars[insn->dest_args[0]].alloc);
} else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]),
p->exec_reg, tmp, FALSE);
- orc_sse_emit_660f (p, code[type], reg_code[type], tmp,
+ orc_sse_emit_sysinsn (p, opcodes[type], tmp,
p->vars[insn->dest_args[0]].alloc);
} else {
ORC_COMPILER_ERROR(p,"rule only works with constants or params");
int tmp;
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_sse_emit_psllw (p, p->vars[insn->src_args[1]].value.i, dest);
+ orc_sse_emit_psllw_imm (p, p->vars[insn->src_args[1]].value.i, dest);
tmp = orc_compiler_get_constant (p, 1,
0xff&(0xff<<p->vars[insn->src_args[1]].value.i));
orc_sse_emit_pand (p, tmp, dest);
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_psllw (p, 8, tmp);
- orc_sse_emit_psraw (p, p->vars[insn->src_args[1]].value.i, tmp);
- orc_sse_emit_psrlw (p, 8, tmp);
+ orc_sse_emit_psllw_imm (p, 8, tmp);
+ orc_sse_emit_psraw_imm (p, p->vars[insn->src_args[1]].value.i, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, tmp);
- orc_sse_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value.i, dest);
- orc_sse_emit_psllw (p, 8, dest);
+ orc_sse_emit_psraw_imm (p, 8 + p->vars[insn->src_args[1]].value.i, dest);
+ orc_sse_emit_psllw_imm (p, 8, dest);
orc_sse_emit_por (p, tmp, dest);
} else {
int tmp;
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_sse_emit_psrlw (p, p->vars[insn->src_args[1]].value.i, dest);
+ orc_sse_emit_psrlw_imm (p, p->vars[insn->src_args[1]].value.i, dest);
tmp = orc_compiler_get_constant (p, 1,
(0xff>>p->vars[insn->src_args[1]].value.i));
orc_sse_emit_pand (p, tmp, dest);
#else
orc_mmx_emit_pshufw (p, ORC_MMX_SHUF(3,2,3,2), src, tmp);
#endif
- orc_sse_emit_psrad (p, 31, tmp);
- orc_sse_emit_psllq (p, 64-p->vars[insn->src_args[1]].value.i, tmp);
+ orc_sse_emit_psrad_imm (p, 31, tmp);
+ orc_sse_emit_psllq_imm (p, 64-p->vars[insn->src_args[1]].value.i, tmp);
- orc_sse_emit_psrlq (p, p->vars[insn->src_args[1]].value.i, dest);
+ orc_sse_emit_psrlq_imm (p, p->vars[insn->src_args[1]].value.i, dest);
orc_sse_emit_por (p, tmp, dest);
} else {
ORC_COMPILER_ERROR(p,"rule only works with constants");
int dest = p->vars[insn->dest_args[0]].alloc;
orc_sse_emit_punpcklbw (p, src, dest);
- orc_sse_emit_psraw (p, 8, dest);
+ orc_sse_emit_psraw_imm (p, 8, dest);
}
static void
/* FIXME need a zero register */
if (0) {
orc_sse_emit_punpcklbw (p, src, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
} else {
orc_sse_emit_pxor(p, tmp, tmp);
orc_sse_emit_punpcklbw (p, tmp, dest);
orc_sse_emit_movdqa (p, src, tmp);
orc_sse_emit_movdqa (p, src, dest);
- orc_sse_emit_psrlw (p, 15, tmp);
- orc_sse_emit_psllw (p, 14, tmp);
+ orc_sse_emit_psrlw_imm (p, 15, tmp);
+ orc_sse_emit_psllw_imm (p, 14, tmp);
orc_sse_emit_por (p, tmp, dest);
- orc_sse_emit_psllw (p, 1, tmp);
+ orc_sse_emit_psllw_imm (p, 1, tmp);
orc_sse_emit_pxor (p, tmp, dest);
orc_sse_emit_packuswb (p, dest, dest);
}
{
int dest = p->vars[insn->dest_args[0]].alloc;
- orc_sse_emit_psllw (p, 8, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_packuswb (p, dest, dest);
}
{
int dest = p->vars[insn->dest_args[0]].alloc;
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_packuswb (p, dest, dest);
}
int dest = p->vars[insn->dest_args[0]].alloc;
orc_sse_emit_punpcklwd (p, src, dest);
- orc_sse_emit_psrad (p, 16, dest);
+ orc_sse_emit_psrad_imm (p, 16, dest);
}
static void
/* FIXME need a zero register */
if (0) {
orc_sse_emit_punpcklwd (p, src, dest);
- orc_sse_emit_psrld (p, 16, dest);
+ orc_sse_emit_psrld_imm (p, 16, dest);
} else {
orc_sse_emit_pxor(p, tmp, tmp);
orc_sse_emit_punpcklwd (p, tmp, dest);
{
int dest = p->vars[insn->dest_args[0]].alloc;
- orc_sse_emit_pslld (p, 16, dest);
- orc_sse_emit_psrad (p, 16, dest);
+ orc_sse_emit_pslld_imm (p, 16, dest);
+ orc_sse_emit_psrad_imm (p, 16, dest);
orc_sse_emit_packssdw (p, dest, dest);
}
{
int dest = p->vars[insn->dest_args[0]].alloc;
- orc_sse_emit_psrad (p, 16, dest);
+ orc_sse_emit_psrad_imm (p, 16, dest);
orc_sse_emit_packssdw (p, dest, dest);
}
int tmp = orc_compiler_get_temp_reg (p);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_psrad (p, 31, tmp);
+ orc_sse_emit_psrad_imm (p, 31, tmp);
orc_sse_emit_punpckldq (p, tmp, dest);
}
tmpc = orc_compiler_get_constant (p, 2, 0x0080);
orc_sse_emit_paddw (p, tmpc, dest);
orc_sse_emit_movdqa (p, dest, tmp);
- orc_sse_emit_psrlw (p, 8, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, tmp);
orc_sse_emit_paddw (p, tmp, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
}
#if 1
int i;
orc_sse_emit_movdqa (p, src, divisor);
- orc_sse_emit_psllw (p, 8, divisor);
- orc_sse_emit_psrlw (p, 1, divisor);
+ orc_sse_emit_psllw_imm (p, 8, divisor);
+ orc_sse_emit_psrlw_imm (p, 1, divisor);
orc_sse_load_constant (p, a, 2, 0x00ff);
tmp = orc_compiler_get_constant (p, 2, 0x8000);
orc_sse_emit_movdqa (p, tmp, j);
- orc_sse_emit_psrlw (p, 8, j);
+ orc_sse_emit_psrlw_imm (p, 8, j);
orc_sse_emit_pxor (p, tmp, dest);
orc_sse_emit_movdqa (p, l, j2);
orc_sse_emit_pandn (p, divisor, l);
orc_sse_emit_psubw (p, l, dest);
- orc_sse_emit_psrlw (p, 1, divisor);
+ orc_sse_emit_psrlw_imm (p, 1, divisor);
orc_sse_emit_pand (p, j, j2);
orc_sse_emit_pxor (p, j2, a);
- orc_sse_emit_psrlw (p, 1, j);
+ orc_sse_emit_psrlw_imm (p, 1, j);
}
orc_sse_emit_movdqa (p, divisor, l);
orc_sse_emit_pxor (p, a, a);
orc_sse_emit_movdqa (p, tmp, j);
- orc_sse_emit_psrlw (p, 8, j);
+ orc_sse_emit_psrlw_imm (p, 8, j);
for(i=0;i<8;i++){
orc_sse_emit_por (p, j, a);
orc_sse_emit_pcmpgtw (p, b, k);
orc_sse_emit_pand (p, j, k);
orc_sse_emit_pxor (p, k, a);
- orc_sse_emit_psrlw (p, 1, j);
+ orc_sse_emit_psrlw_imm (p, 1, j);
}
orc_sse_emit_movdqa (p, a, dest);
int tmp = orc_compiler_get_temp_reg (p);
orc_sse_emit_punpcklbw (p, src, tmp);
- orc_sse_emit_psraw (p, 8, tmp);
+ orc_sse_emit_psraw_imm (p, 8, tmp);
orc_sse_emit_punpcklbw (p, dest, dest);
- orc_sse_emit_psraw (p, 8, dest);
+ orc_sse_emit_psraw_imm (p, 8, dest);
orc_sse_emit_pmullw (p, tmp, dest);
}
int tmp = orc_compiler_get_temp_reg (p);
orc_sse_emit_punpcklbw (p, src, tmp);
- orc_sse_emit_psrlw (p, 8, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, tmp);
orc_sse_emit_punpcklbw (p, dest, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_pmullw (p, tmp, dest);
}
orc_sse_emit_movdqa (p, dest, tmp);
orc_sse_emit_pmullw (p, src, dest);
- orc_sse_emit_psllw (p, 8, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_movdqa (p, src, tmp2);
- orc_sse_emit_psraw (p, 8, tmp2);
- orc_sse_emit_psraw (p, 8, tmp);
+ orc_sse_emit_psraw_imm (p, 8, tmp2);
+ orc_sse_emit_psraw_imm (p, 8, tmp);
orc_sse_emit_pmullw (p, tmp2, tmp);
- orc_sse_emit_psllw (p, 8, tmp);
+ orc_sse_emit_psllw_imm (p, 8, tmp);
orc_sse_emit_por (p, tmp, dest);
}
orc_sse_emit_movdqa (p, src, tmp);
orc_sse_emit_movdqa (p, dest, tmp2);
- orc_sse_emit_psllw (p, 8, tmp);
- orc_sse_emit_psraw (p, 8, tmp);
+ orc_sse_emit_psllw_imm (p, 8, tmp);
+ orc_sse_emit_psraw_imm (p, 8, tmp);
- orc_sse_emit_psllw (p, 8, dest);
- orc_sse_emit_psraw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, dest);
+ orc_sse_emit_psraw_imm (p, 8, dest);
orc_sse_emit_pmullw (p, tmp, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_psraw (p, 8, tmp);
- orc_sse_emit_psraw (p, 8, tmp2);
+ orc_sse_emit_psraw_imm (p, 8, tmp);
+ orc_sse_emit_psraw_imm (p, 8, tmp2);
orc_sse_emit_pmullw (p, tmp, tmp2);
- orc_sse_emit_psrlw (p, 8, tmp2);
- orc_sse_emit_psllw (p, 8, tmp2);
+ orc_sse_emit_psrlw_imm (p, 8, tmp2);
+ orc_sse_emit_psllw_imm (p, 8, tmp2);
orc_sse_emit_por (p, tmp2, dest);
}
orc_sse_emit_movdqa (p, src, tmp);
orc_sse_emit_movdqa (p, dest, tmp2);
- orc_sse_emit_psllw (p, 8, tmp);
- orc_sse_emit_psrlw (p, 8, tmp);
+ orc_sse_emit_psllw_imm (p, 8, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, tmp);
- orc_sse_emit_psllw (p, 8, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_pmullw (p, tmp, dest);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_psrlw (p, 8, tmp);
- orc_sse_emit_psrlw (p, 8, tmp2);
+ orc_sse_emit_psrlw_imm (p, 8, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, tmp2);
orc_sse_emit_pmullw (p, tmp, tmp2);
- orc_sse_emit_psrlw (p, 8, tmp2);
- orc_sse_emit_psllw (p, 8, tmp2);
+ orc_sse_emit_psrlw_imm (p, 8, tmp2);
+ orc_sse_emit_psllw_imm (p, 8, tmp2);
orc_sse_emit_por (p, tmp2, dest);
}
/* FIXME slow */
/* same as convlw */
- orc_sse_emit_pslld (p, 16, dest);
- orc_sse_emit_psrad (p, 16, dest);
+ orc_sse_emit_pslld_imm (p, 16, dest);
+ orc_sse_emit_psrad_imm (p, 16, dest);
orc_sse_emit_packssdw (p, dest, dest);
}
/* FIXME slow */
- orc_sse_emit_psrad (p, 16, dest);
+ orc_sse_emit_psrad_imm (p, 16, dest);
orc_sse_emit_packssdw (p, dest, dest);
}
int src = p->vars[insn->src_args[0]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
- orc_sse_emit_psrlq (p, 32, dest);
+ orc_sse_emit_psrlq_imm (p, 32, dest);
#ifndef MMX
orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,2,0), src, dest);
#else
/* FIXME slow */
/* same as convwb */
- orc_sse_emit_psllw (p, 8, dest);
- orc_sse_emit_psraw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, dest);
+ orc_sse_emit_psraw_imm (p, 8, dest);
orc_sse_emit_packsswb (p, dest, dest);
}
/* FIXME slow */
- orc_sse_emit_psraw (p, 8, dest);
+ orc_sse_emit_psraw_imm (p, 8, dest);
orc_sse_emit_packsswb (p, dest, dest);
}
/* FIXME slow */
- orc_sse_emit_psrad (p, 16, dest1);
+ orc_sse_emit_psrad_imm (p, 16, dest1);
orc_sse_emit_packssdw (p, dest1, dest1);
if (dest2 != src) {
orc_sse_emit_movdqa (p, src, dest2);
}
- orc_sse_emit_pslld (p, 16, dest2);
- orc_sse_emit_psrad (p, 16, dest2);
+ orc_sse_emit_pslld_imm (p, 16, dest2);
+ orc_sse_emit_psrad_imm (p, 16, dest2);
orc_sse_emit_packssdw (p, dest2, dest2);
}
/* FIXME slow */
- orc_sse_emit_psraw (p, 8, dest1);
+ orc_sse_emit_psraw_imm (p, 8, dest1);
orc_sse_emit_packsswb (p, dest1, dest1);
if (dest2 != src) {
}
#if 0
- orc_sse_emit_psllw (p, 8, dest2);
- orc_sse_emit_psraw (p, 8, dest2);
+ orc_sse_emit_psllw_imm (p, 8, dest2);
+ orc_sse_emit_psraw_imm (p, 8, dest2);
orc_sse_emit_packsswb (p, dest2, dest2);
#else
orc_sse_emit_pand (p, tmp, dest2);
int tmp = orc_compiler_get_temp_reg (p);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_psllw (p, 8, tmp);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_por (p, tmp, dest);
}
int tmp = orc_compiler_get_temp_reg (p);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_pslld (p, 16, tmp);
- orc_sse_emit_psrld (p, 16, dest);
+ orc_sse_emit_pslld_imm (p, 16, tmp);
+ orc_sse_emit_psrld_imm (p, 16, dest);
orc_sse_emit_por (p, tmp, dest);
orc_sse_emit_movdqa (p, dest, tmp);
- orc_sse_emit_psllw (p, 8, tmp);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_por (p, tmp, dest);
}
int tmp = orc_compiler_get_temp_reg (p);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_pslld (p, 16, tmp);
- orc_sse_emit_psrld (p, 16, dest);
+ orc_sse_emit_pslld_imm (p, 16, tmp);
+ orc_sse_emit_psrld_imm (p, 16, dest);
orc_sse_emit_por (p, tmp, dest);
}
int tmp = orc_compiler_get_temp_reg (p);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_psllq (p, 32, tmp);
- orc_sse_emit_psrlq (p, 32, dest);
+ orc_sse_emit_psllq_imm (p, 32, tmp);
+ orc_sse_emit_psrlq_imm (p, 32, dest);
orc_sse_emit_por (p, tmp, dest);
orc_sse_emit_movdqa (p, dest, tmp);
- orc_sse_emit_pslld (p, 16, tmp);
- orc_sse_emit_psrld (p, 16, dest);
+ orc_sse_emit_pslld_imm (p, 16, tmp);
+ orc_sse_emit_psrld_imm (p, 16, dest);
orc_sse_emit_por (p, tmp, dest);
orc_sse_emit_movdqa (p, dest, tmp);
- orc_sse_emit_psllw (p, 8, tmp);
- orc_sse_emit_psrlw (p, 8, dest);
+ orc_sse_emit_psllw_imm (p, 8, tmp);
+ orc_sse_emit_psrlw_imm (p, 8, dest);
orc_sse_emit_por (p, tmp, dest);
}
orc_sse_emit_movdqa (p, dest, tmp);
orc_sse_emit_pxor(p, src, tmp);
- orc_sse_emit_psrad(p, 1, tmp);
+ orc_sse_emit_psrad_imm(p, 1, tmp);
orc_sse_emit_por(p, src, dest);
orc_sse_emit_psubd(p, tmp, dest);
orc_sse_emit_movdqa (p, dest, tmp);
orc_sse_emit_pxor(p, src, tmp);
- orc_sse_emit_psrld(p, 1, tmp);
+ orc_sse_emit_psrld_imm(p, 1, tmp);
orc_sse_emit_por(p, src, dest);
orc_sse_emit_psubd(p, tmp, dest);
orc_sse_emit_movdqa (p, src, tmp2);
orc_sse_emit_pxor (p, dest, tmp2);
- orc_sse_emit_psrad (p, 1, tmp2);
+ orc_sse_emit_psrad_imm (p, 1, tmp2);
orc_sse_emit_paddd (p, tmp2, tmp);
orc_sse_emit_psrad (p, 30, tmp);
orc_sse_emit_pslld (p, 30, tmp);
orc_sse_emit_movdqa (p, tmp, tmp2);
- orc_sse_emit_pslld (p, 1, tmp2);
+ orc_sse_emit_pslld_imm (p, 1, tmp2);
orc_sse_emit_movdqa (p, tmp, tmp3);
orc_sse_emit_pxor (p, tmp2, tmp3);
- orc_sse_emit_psrad (p, 31, tmp3);
+ orc_sse_emit_psrad_imm (p, 31, tmp3);
- orc_sse_emit_psrad (p, 31, tmp2);
+ orc_sse_emit_psrad_imm (p, 31, tmp2);
tmp = orc_compiler_get_constant (p, 4, 0x80000000);
orc_sse_emit_pxor (p, tmp, tmp2); // clamped value
orc_sse_emit_pand (p, tmp3, tmp2);
orc_sse_emit_pxor (p, tmp, t);
orc_sse_emit_por (p, t, s);
orc_sse_emit_movdqa (p, src, t);
- orc_sse_emit_psrad (p, 31, s);
- orc_sse_emit_psrad (p, 31, t);
+ orc_sse_emit_psrad_imm (p, 31, s);
+ orc_sse_emit_psrad_imm (p, 31, t);
orc_sse_emit_pand (p, s, dest);
tmp = orc_compiler_get_constant (p, 4, 0x7fffffff);
orc_sse_emit_pxor (p, tmp, t);
orc_sse_emit_por (p, dest, tmp);
orc_sse_emit_pxor (p, dest, tmp2);
- orc_sse_emit_psrad (p, 1, tmp2);
+ orc_sse_emit_psrad_imm (p, 1, tmp2);
orc_sse_emit_psubd (p, tmp2, tmp);
- orc_sse_emit_psrad (p, 30, tmp);
- orc_sse_emit_pslld (p, 30, tmp);
+ orc_sse_emit_psrad_imm (p, 30, tmp);
+ orc_sse_emit_pslld_imm (p, 30, tmp);
orc_sse_emit_movdqa (p, tmp, tmp2);
- orc_sse_emit_pslld (p, 1, tmp2);
+ orc_sse_emit_pslld_imm (p, 1, tmp2);
orc_sse_emit_movdqa (p, tmp, tmp3);
orc_sse_emit_pxor (p, tmp2, tmp3);
- orc_sse_emit_psrad (p, 31, tmp3); // tmp3 is mask: ~0 is for clamping
+ orc_sse_emit_psrad_imm (p, 31, tmp3); // tmp3 is mask: ~0 is for clamping
- orc_sse_emit_psrad (p, 31, tmp2);
+ orc_sse_emit_psrad_imm (p, 31, tmp2);
tmp = orc_compiler_get_constant (p, 4, 0x80000000);
orc_sse_emit_pxor (p, tmp, tmp2); // clamped value
orc_sse_emit_pand (p, tmp3, tmp2);
/* Compute the bit that gets carried from bit 0 to bit 1 */
orc_sse_emit_movdqa (p, src, tmp);
orc_sse_emit_pand (p, dest, tmp);
- orc_sse_emit_pslld (p, 31, tmp);
- orc_sse_emit_psrld (p, 31, tmp);
+ orc_sse_emit_pslld_imm (p, 31, tmp);
+ orc_sse_emit_psrld_imm (p, 31, tmp);
/* Add in (src>>1) */
orc_sse_emit_movdqa (p, src, tmp2);
- orc_sse_emit_psrld (p, 1, tmp2);
+ orc_sse_emit_psrld_imm (p, 1, tmp2);
orc_sse_emit_paddd (p, tmp2, tmp);
/* Add in (dest>>1) */
orc_sse_emit_movdqa (p, dest, tmp2);
- orc_sse_emit_psrld (p, 1, tmp2);
+ orc_sse_emit_psrld_imm (p, 1, tmp2);
orc_sse_emit_paddd (p, tmp2, tmp);
/* turn overflow bit into mask */
- orc_sse_emit_psrad (p, 31, tmp);
+ orc_sse_emit_psrad_imm (p, 31, tmp);
/* compute the sum, then or over the mask */
orc_sse_emit_paddd (p, src, dest);
orc_sse_emit_movdqa (p, src, tmp2);
orc_sse_emit_pxor (p, dest, tmp2);
- orc_sse_emit_psrld (p, 1, tmp2);
+ orc_sse_emit_psrld_imm (p, 1, tmp2);
orc_sse_emit_paddd (p, tmp2, tmp);
- orc_sse_emit_psrad (p, 31, tmp);
+ orc_sse_emit_psrad_imm (p, 31, tmp);
orc_sse_emit_paddd (p, src, dest);
orc_sse_emit_por (p, tmp, dest);
}
int tmp2 = orc_compiler_get_temp_reg (p);
orc_sse_emit_movdqa (p, src, tmp2);
- orc_sse_emit_psrld (p, 1, tmp2);
+ orc_sse_emit_psrld_imm (p, 1, tmp2);
orc_sse_emit_movdqa (p, dest, tmp);
- orc_sse_emit_psrld (p, 1, tmp);
+ orc_sse_emit_psrld_imm (p, 1, tmp);
orc_sse_emit_psubd (p, tmp, tmp2);
/* turn overflow bit into mask */
- orc_sse_emit_psrad (p, 31, tmp2);
+ orc_sse_emit_psrad_imm (p, 31, tmp2);
/* compute the difference, then and over the mask */
orc_sse_emit_psubd (p, src, dest);
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- orc_sse_emit_0f (p, insn_name, code, \
+ orc_sse_emit_ ## insn_name (p, \
p->vars[insn->src_args[0]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- orc_sse_emit_0f (p, insn_name, code, \
+ orc_sse_emit_ ## insn_name (p, \
p->vars[insn->src_args[1]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
-BINARY_F(addf, "addps", 0x58)
-BINARY_F(subf, "subps", 0x5c)
-BINARY_F(mulf, "mulps", 0x59)
-BINARY_F(divf, "divps", 0x5e)
-UNARY_F(sqrtf, "sqrtps", 0x51)
+BINARY_F(addf, addps, 0x58)
+BINARY_F(subf, subps, 0x5c)
+BINARY_F(mulf, mulps, 0x59)
+BINARY_F(divf, divps, 0x5e)
+UNARY_F(sqrtf, sqrtps, 0x51)
#define UNARY_D(opcode,insn_name,code) \
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- orc_sse_emit_660f (p, insn_name, code, \
+ orc_sse_emit_ ## insn_name (p, \
p->vars[insn->src_args[0]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
- orc_sse_emit_660f (p, insn_name, code, \
+ orc_sse_emit_ ## insn_name (p, \
p->vars[insn->src_args[1]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
-BINARY_D(addd, "addpd", 0x58)
-BINARY_D(subd, "subpd", 0x5c)
-BINARY_D(muld, "mulpd", 0x59)
-BINARY_D(divd, "divpd", 0x5e)
-UNARY_D(sqrtd, "sqrtpd", 0x51)
+BINARY_D(addd, addpd, 0x58)
+BINARY_D(subd, subpd, 0x5c)
+BINARY_D(muld, mulpd, 0x59)
+BINARY_D(divd, divpd, 0x5e)
+UNARY_D(sqrtd, sqrtpd, 0x51)
static void
sse_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_FAST_NAN) {
- orc_sse_emit_0f (p, "minps", 0x5d,
+ orc_sse_emit_minps (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
} else {
orc_sse_emit_movdqa (p,
p->vars[insn->src_args[1]].alloc,
tmp);
- orc_sse_emit_0f (p, "minps", 0x5d,
+ orc_sse_emit_minps (p,
p->vars[insn->dest_args[0]].alloc,
tmp);
- orc_sse_emit_0f (p, "minps", 0x5d,
+ orc_sse_emit_minps (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
orc_sse_emit_por (p,
sse_rule_mind (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_FAST_NAN) {
- orc_sse_emit_660f (p, "minpd", 0x5d,
+ orc_sse_emit_minpd (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
} else {
orc_sse_emit_movdqa (p,
p->vars[insn->src_args[1]].alloc,
tmp);
- orc_sse_emit_660f (p, "minpd", 0x5d,
+ orc_sse_emit_minpd (p,
p->vars[insn->dest_args[0]].alloc,
tmp);
- orc_sse_emit_660f (p, "minpd", 0x5d,
+ orc_sse_emit_minpd (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
orc_sse_emit_por (p,
sse_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_FAST_NAN) {
- orc_sse_emit_0f (p, "maxps", 0x5f,
+ orc_sse_emit_maxps (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
} else {
orc_sse_emit_movdqa (p,
p->vars[insn->src_args[1]].alloc,
tmp);
- orc_sse_emit_0f (p, "maxps", 0x5f,
+ orc_sse_emit_maxps (p,
p->vars[insn->dest_args[0]].alloc,
tmp);
- orc_sse_emit_0f (p, "maxps", 0x5f,
+ orc_sse_emit_maxps (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
orc_sse_emit_por (p,
sse_rule_maxd (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_FAST_NAN) {
- orc_sse_emit_660f (p, "maxpd", 0x5f,
+ orc_sse_emit_maxpd (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
} else {
orc_sse_emit_movdqa (p,
p->vars[insn->src_args[1]].alloc,
tmp);
- orc_sse_emit_660f (p, "maxpd", 0x5f,
+ orc_sse_emit_maxpd (p,
p->vars[insn->dest_args[0]].alloc,
tmp);
- orc_sse_emit_660f (p, "maxpd", 0x5f,
+ orc_sse_emit_maxpd (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
orc_sse_emit_por (p,
static void
sse_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_0f (p, "cmpeqps", 0xc2,
+ orc_sse_emit_cmpeqps (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
- *p->codeptr++ = 0x00;
}
static void
sse_rule_cmpeqd (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_660f (p, "cmpeqpd", 0xc2,
+ orc_sse_emit_cmpeqpd (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
- *p->codeptr++ = 0x00;
}
static void
sse_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_0f (p, "cmpltps", 0xc2,
+ orc_sse_emit_cmpltps (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
- *p->codeptr++ = 0x01;
}
static void
sse_rule_cmpltd (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_660f (p, "cmpltpd", 0xc2,
+ orc_sse_emit_cmpltpd (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
- *p->codeptr++ = 0x01;
}
static void
sse_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_0f (p, "cmpleps", 0xc2,
+ orc_sse_emit_cmpleps (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
- *p->codeptr++ = 0x02;
}
static void
sse_rule_cmpled (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_660f (p, "cmplepd", 0xc2,
+ orc_sse_emit_cmplepd (p,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
- *p->codeptr++ = 0x02;
}
tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000);
orc_sse_emit_movdqa (p, src, tmp);
- orc_sse_emit_f30f (p, "cvttps2dq", 0x5b, src, dest);
- orc_sse_emit_psrad (p, 31, tmp);
+ orc_sse_emit_cvttps2dq (p, src, dest);
+ orc_sse_emit_psrad_imm (p, 31, tmp);
orc_sse_emit_pcmpeqd (p, dest, tmpc);
orc_sse_emit_pandn (p, tmpc, tmp);
orc_sse_emit_paddd (p, tmp, dest);
tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000);
orc_sse_emit_pshufd (p, ORC_SSE_SHUF(3,1,3,1), src, tmp);
- orc_sse_emit_660f (p, "cvttpd2dq", 0xe6, src, dest);
- orc_sse_emit_psrad (p, 31, tmp);
+ orc_sse_emit_cvttpd2dq (p, src, dest);
+ orc_sse_emit_psrad_imm (p, 31, tmp);
orc_sse_emit_pcmpeqd (p, dest, tmpc);
orc_sse_emit_pandn (p, tmpc, tmp);
orc_sse_emit_paddd (p, tmp, dest);
static void
sse_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_0f (p, "cvtdq2ps", 0x5b,
+ orc_sse_emit_cvtdq2ps (p,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
static void
sse_rule_convld (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_f30f (p, "cvtdq2pd", 0xe6,
+ orc_sse_emit_cvtdq2pd (p,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
static void
sse_rule_convfd (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_0f (p, "cvtps2pd", 0x5a,
+ orc_sse_emit_cvtps2pd (p,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
static void
sse_rule_convdf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- orc_sse_emit_660f (p, "cvtpd2ps", 0x5a,
+ orc_sse_emit_cvtpd2ps (p,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
#define _ORC_SSE_H_
#include <orc/orcx86.h>
+#include <orc/orcx86insn.h>
typedef enum {
ORC_TARGET_SSE_SSE2 = (1<<0),
void orc_sse_load_constant (OrcCompiler *compiler, int reg, int size,
orc_uint64 value);
+void orc_sse_emit_sysinsn (OrcCompiler *p, int opcode, int src, int dest);
unsigned int orc_sse_get_cpu_flags (void);
-/* SSE instructions */
-
-/* SSE2 instructions */
-#define orc_sse_emit_punpcklbw(p,a,b) orc_sse_emit_660f (p, "punpcklbw", 0x60, a, b)
-#define orc_sse_emit_punpcklwd(p,a,b) orc_sse_emit_660f (p, "punpcklwd", 0x61, a, b)
-#define orc_sse_emit_punpckldq(p,a,b) orc_sse_emit_660f (p, "punpckldq", 0x62, a, b)
-#define orc_sse_emit_packsswb(p,a,b) orc_sse_emit_660f (p, "packsswb", 0x63, a, b)
-#define orc_sse_emit_pcmpgtb(p,a,b) orc_sse_emit_660f (p, "pcmpgtb", 0x64, a, b)
-#define orc_sse_emit_pcmpgtw(p,a,b) orc_sse_emit_660f (p, "pcmpgtw", 0x65, a, b)
-#define orc_sse_emit_pcmpgtd(p,a,b) orc_sse_emit_660f (p, "pcmpgtd", 0x66, a, b)
-#define orc_sse_emit_packuswb(p,a,b) orc_sse_emit_660f (p, "packuswb", 0x67, a, b)
-#define orc_sse_emit_punpckhbw(p,a,b) orc_sse_emit_660f (p, "punpckhbw", 0x68, a, b)
-#define orc_sse_emit_punpckhwd(p,a,b) orc_sse_emit_660f (p, "punpckhwd", 0x69, a, b)
-#define orc_sse_emit_punpckhdq(p,a,b) orc_sse_emit_660f (p, "punpckhdq", 0x6a, a, b)
-#define orc_sse_emit_packssdw(p,a,b) orc_sse_emit_660f (p, "packssdw", 0x6b, a, b)
-#define orc_sse_emit_punpcklqdq(p,a,b) orc_sse_emit_660f (p, "punpcklqdq", 0x6c, a, b)
-#define orc_sse_emit_punpckhqdq(p,a,b) orc_sse_emit_660f (p, "punpckhqdq", 0x6d, a, b)
-
-#define orc_sse_emit_movdqa(p,a,b) orc_sse_emit_660f (p, "movdqa", 0x6f, a, b)
-
-#define orc_sse_emit_psraw(p,a,b) orc_sse_emit_shiftimm (p, "psraw", 0x71, 4, a, b)
-#define orc_sse_emit_psrlw(p,a,b) orc_sse_emit_shiftimm (p, "psrlw", 0x71, 2, a, b)
-#define orc_sse_emit_psllw(p,a,b) orc_sse_emit_shiftimm (p, "psllw", 0x71, 6, a, b)
-#define orc_sse_emit_psrad(p,a,b) orc_sse_emit_shiftimm (p, "psrad", 0x72, 4, a, b)
-#define orc_sse_emit_psrld(p,a,b) orc_sse_emit_shiftimm (p, "psrld", 0x72, 2, a, b)
-#define orc_sse_emit_pslld(p,a,b) orc_sse_emit_shiftimm (p, "pslld", 0x72, 6, a, b)
-#define orc_sse_emit_psrlq(p,a,b) orc_sse_emit_shiftimm (p, "psrlq", 0x73, 2, a, b)
-#define orc_sse_emit_psllq(p,a,b) orc_sse_emit_shiftimm (p, "psllq", 0x73, 6, a, b)
-#define orc_sse_emit_psrldq(p,a,b) orc_sse_emit_shiftimm (p, "psrldq", 0x73, 3, a, b)
-#define orc_sse_emit_pslldq(p,a,b) orc_sse_emit_shiftimm (p, "pslldq", 0x73, 7, a, b)
-
-#define orc_sse_emit_psrlq_reg(p,a,b) orc_sse_emit_660f (p, "psrlq", 0xd3, a, b)
-
-#define orc_sse_emit_pcmpeqb(p,a,b) orc_sse_emit_660f (p, "pcmpeqb", 0x74, a, b)
-#define orc_sse_emit_pcmpeqw(p,a,b) orc_sse_emit_660f (p, "pcmpeqw", 0x75, a, b)
-#define orc_sse_emit_pcmpeqd(p,a,b) orc_sse_emit_660f (p, "pcmpeqd", 0x76, a, b)
-
-
-#define orc_sse_emit_paddq(p,a,b) orc_sse_emit_660f (p, "paddq", 0xd4, a, b)
-#define orc_sse_emit_pmullw(p,a,b) orc_sse_emit_660f (p, "pmullw", 0xd5, a, b)
-
-#define orc_sse_emit_psubusb(p,a,b) orc_sse_emit_660f (p, "psubusb", 0xd8, a, b)
-#define orc_sse_emit_psubusw(p,a,b) orc_sse_emit_660f (p, "psubusw", 0xd9, a, b)
-#define orc_sse_emit_pminub(p,a,b) orc_sse_emit_660f (p, "pminub", 0xda, a, b)
-#define orc_sse_emit_pand(p,a,b) orc_sse_emit_660f (p, "pand", 0xdb, a, b)
-#define orc_sse_emit_paddusb(p,a,b) orc_sse_emit_660f (p, "paddusb", 0xdc, a, b)
-#define orc_sse_emit_paddusw(p,a,b) orc_sse_emit_660f (p, "paddusw", 0xdd, a, b)
-#define orc_sse_emit_pmaxub(p,a,b) orc_sse_emit_660f (p, "pmaxub", 0xde, a, b)
-#define orc_sse_emit_pandn(p,a,b) orc_sse_emit_660f (p, "pandn", 0xdf, a, b)
-
-#define orc_sse_emit_pavgb(p,a,b) orc_sse_emit_660f (p, "pavgb", 0xe0, a, b)
-#define orc_sse_emit_pavgw(p,a,b) orc_sse_emit_660f (p, "pavgw", 0xe3, a, b)
-
-#define orc_sse_emit_pmulhuw(p,a,b) orc_sse_emit_660f (p, "pmulhuw", 0xe4, a, b)
-#define orc_sse_emit_pmulhw(p,a,b) orc_sse_emit_660f (p, "pmulhw", 0xe5, a, b)
-
-#define orc_sse_emit_psubsb(p,a,b) orc_sse_emit_660f (p, "psubsb", 0xe8, a, b)
-#define orc_sse_emit_psubsw(p,a,b) orc_sse_emit_660f (p, "psubsw", 0xe9, a, b)
-#define orc_sse_emit_pminsw(p,a,b) orc_sse_emit_660f (p, "pminsw", 0xea, a, b)
-#define orc_sse_emit_por(p,a,b) orc_sse_emit_660f (p, "por", 0xeb, a, b)
-#define orc_sse_emit_paddsb(p,a,b) orc_sse_emit_660f (p, "paddsb", 0xec, a, b)
-#define orc_sse_emit_paddsw(p,a,b) orc_sse_emit_660f (p, "paddsw", 0xed, a, b)
-#define orc_sse_emit_pmaxsw(p,a,b) orc_sse_emit_660f (p, "pmaxsw", 0xee, a, b)
-#define orc_sse_emit_pxor(p,a,b) orc_sse_emit_660f (p, "pxor", 0xef, a, b)
-
-#define orc_sse_emit_pmuludq(p,a,b) orc_sse_emit_660f (p, "pmuludq", 0xf4, a, b)
-#define orc_sse_emit_pmaddwd(p,a,b) orc_sse_emit_660f (p, "pmaddwd", 0xf5, a, b)
-#define orc_sse_emit_psadbw(p,a,b) orc_sse_emit_660f (p, "psadbw", 0xf6, a, b)
-
-#define orc_sse_emit_psubb(p,a,b) orc_sse_emit_660f (p, "psubb", 0xf8, a, b)
-#define orc_sse_emit_psubw(p,a,b) orc_sse_emit_660f (p, "psubw", 0xf9, a, b)
-#define orc_sse_emit_psubd(p,a,b) orc_sse_emit_660f (p, "psubd", 0xfa, a, b)
-#define orc_sse_emit_psubq(p,a,b) orc_sse_emit_660f (p, "psubq", 0xfb, a, b)
-#define orc_sse_emit_paddb(p,a,b) orc_sse_emit_660f (p, "paddb", 0xfc, a, b)
-#define orc_sse_emit_paddw(p,a,b) orc_sse_emit_660f (p, "paddw", 0xfd, a, b)
-#define orc_sse_emit_paddd(p,a,b) orc_sse_emit_660f (p, "paddd", 0xfe, a, b)
-
-/* SSE3 instructions */
-
-/* SSSE3 instructions */
-#define orc_sse_emit_pshufb(p,a,b) orc_sse_emit_660f (p, "pshufb", 0x3800, a, b)
-#define orc_sse_emit_phaddw(p,a,b) orc_sse_emit_660f (p, "phaddw", 0x3801, a, b)
-#define orc_sse_emit_phaddd(p,a,b) orc_sse_emit_660f (p, "phaddd", 0x3802, a, b)
-#define orc_sse_emit_phaddsw(p,a,b) orc_sse_emit_660f (p, "phaddsw", 0x3803, a, b)
-#define orc_sse_emit_pmaddubsw(p,a,b) orc_sse_emit_660f (p, "pmaddubsw", 0x3804, a, b)
-#define orc_sse_emit_phsubw(p,a,b) orc_sse_emit_660f (p, "phsubw", 0x3805, a, b)
-#define orc_sse_emit_phsubd(p,a,b) orc_sse_emit_660f (p, "phsubd", 0x3806, a, b)
-#define orc_sse_emit_phsubsw(p,a,b) orc_sse_emit_660f (p, "phsubsw", 0x3807, a, b)
-#define orc_sse_emit_psignb(p,a,b) orc_sse_emit_660f (p, "psignb", 0x3808, a, b)
-#define orc_sse_emit_psignw(p,a,b) orc_sse_emit_660f (p, "psignw", 0x3809, a, b)
-#define orc_sse_emit_psignd(p,a,b) orc_sse_emit_660f (p, "psignd", 0x380a, a, b)
-#define orc_sse_emit_pmulhrsw(p,a,b) orc_sse_emit_660f (p, "pmulhrsw", 0x380b, a, b)
-
-#define orc_sse_emit_pabsb(p,a,b) orc_sse_emit_660f (p, "pabsb", 0x381c, a, b)
-#define orc_sse_emit_pabsw(p,a,b) orc_sse_emit_660f (p, "pabsw", 0x381d, a, b)
-#define orc_sse_emit_pabsd(p,a,b) orc_sse_emit_660f (p, "pabsd", 0x381e, a, b)
-
-
-/* SSE4.1 instructions */
-#define orc_sse_emit_pmovsxbw(p,a,b) orc_sse_emit_660f (p, "pmovsxbw", 0x3820, a, b)
-#define orc_sse_emit_pmovsxbd(p,a,b) orc_sse_emit_660f (p, "pmovsxbd", 0x3821, a, b)
-#define orc_sse_emit_pmovsxbq(p,a,b) orc_sse_emit_660f (p, "pmovsxbq", 0x3822, a, b)
-#define orc_sse_emit_pmovsxwd(p,a,b) orc_sse_emit_660f (p, "pmovsxwd", 0x3823, a, b)
-#define orc_sse_emit_pmovsxwq(p,a,b) orc_sse_emit_660f (p, "pmovsxwq", 0x3824, a, b)
-#define orc_sse_emit_pmovsxdq(p,a,b) orc_sse_emit_660f (p, "pmovsxdq", 0x3825, a, b)
-
-#define orc_sse_emit_pmuldq(p,a,b) orc_sse_emit_660f (p, "pmuldq", 0x3828, a, b)
-#define orc_sse_emit_pcmpeqq(p,a,b) orc_sse_emit_660f (p, "pcmpeqq", 0x3829, a, b)
-
-#define orc_sse_emit_packusdw(p,a,b) orc_sse_emit_660f (p, "packusdw", 0x382b, a, b)
-
-#define orc_sse_emit_pmovzxbw(p,a,b) orc_sse_emit_660f (p, "pmovzxbw", 0x3830, a, b)
-#define orc_sse_emit_pmovzxbd(p,a,b) orc_sse_emit_660f (p, "pmovzxbd", 0x3831, a, b)
-#define orc_sse_emit_pmovzxbq(p,a,b) orc_sse_emit_660f (p, "pmovzxbq", 0x3832, a, b)
-#define orc_sse_emit_pmovzxwd(p,a,b) orc_sse_emit_660f (p, "pmovzxwd", 0x3833, a, b)
-#define orc_sse_emit_pmovzxwq(p,a,b) orc_sse_emit_660f (p, "pmovzxwq", 0x3834, a, b)
-#define orc_sse_emit_pmovzxdq(p,a,b) orc_sse_emit_660f (p, "pmovzxdq", 0x3835, a, b)
-
-#define orc_sse_emit_pmulld(p,a,b) orc_sse_emit_660f (p, "pmulld", 0x3840, a, b)
-#define orc_sse_emit_phminposuw(p,a,b) orc_sse_emit_660f (p, "phminposuw", 0x3841, a, b)
-
-#define orc_sse_emit_pminsb(p,a,b) orc_sse_emit_660f (p, "pminsb", 0x3838, a, b)
-#define orc_sse_emit_pminsd(p,a,b) orc_sse_emit_660f (p, "pminsd", 0x3839, a, b)
-#define orc_sse_emit_pminuw(p,a,b) orc_sse_emit_660f (p, "pminuw", 0x383a, a, b)
-#define orc_sse_emit_pminud(p,a,b) orc_sse_emit_660f (p, "pminud", 0x383b, a, b)
-#define orc_sse_emit_pmaxsb(p,a,b) orc_sse_emit_660f (p, "pmaxsb", 0x383c, a, b)
-#define orc_sse_emit_pmaxsd(p,a,b) orc_sse_emit_660f (p, "pmaxsd", 0x383d, a, b)
-#define orc_sse_emit_pmaxuw(p,a,b) orc_sse_emit_660f (p, "pmaxuw", 0x383e, a, b)
-#define orc_sse_emit_pmaxud(p,a,b) orc_sse_emit_660f (p, "pmaxud", 0x383f, a, b)
-
-/* SSE4.2 instructions */
-#define orc_sse_emit_pcmpgtq(p,a,b) orc_sse_emit_660f (p, "pcmpgtq", 0x3837, a, b)
#endif
--- /dev/null
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <orc/orc.h>
+#include <orc/orcsysinsn.h>
+
+
+OrcSysOpcode orc_x86_opcodes[] = {
+ { "punpcklbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f60 },
+ { "punpcklwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f61 },
+ { "punpckldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f62 },
+ { "packsswb", ORC_X86_INSN_TYPE_SD, 0, 0x660f63 },
+ { "pcmpgtb", ORC_X86_INSN_TYPE_SD, 0, 0x660f64 },
+ { "pcmpgtw", ORC_X86_INSN_TYPE_SD, 0, 0x660f65 },
+ { "pcmpgtd", ORC_X86_INSN_TYPE_SD, 0, 0x660f66 },
+ { "packuswb", ORC_X86_INSN_TYPE_SD, 0, 0x660f67 },
+ { "punpckhbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f68 },
+ { "punpckhwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f69 },
+ { "punpckhdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f6a },
+ { "packssdw", ORC_X86_INSN_TYPE_SD, 0, 0x660f6b },
+ { "punpcklqdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f6c },
+ { "punpckhqdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f6d },
+ { "movdqa", ORC_X86_INSN_TYPE_SD, 0, 0x660f6f },
+ { "psraw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe1 },
+ { "psrlw", ORC_X86_INSN_TYPE_SD, 0, 0x660fd1 },
+ { "psllw", ORC_X86_INSN_TYPE_SD, 0, 0x660ff1 },
+ { "psrad", ORC_X86_INSN_TYPE_SD, 0, 0x660fe2 },
+ { "psrld", ORC_X86_INSN_TYPE_SD, 0, 0x660fd2 },
+ { "pslld", ORC_X86_INSN_TYPE_SD, 0, 0x660ff2 },
+ { "psrlq", ORC_X86_INSN_TYPE_SD, 0, 0x660fd3 },
+ { "psllq", ORC_X86_INSN_TYPE_SD, 0, 0x660ff3 },
+ { "psrldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f73 },
+ { "pslldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f73 },
+ { "psrlq", ORC_X86_INSN_TYPE_SD, 0, 0x660fd3 },
+ { "pcmpeqb", ORC_X86_INSN_TYPE_SD, 0, 0x660f74 },
+ { "pcmpeqw", ORC_X86_INSN_TYPE_SD, 0, 0x660f75 },
+ { "pcmpeqd", ORC_X86_INSN_TYPE_SD, 0, 0x660f76 },
+ { "paddq", ORC_X86_INSN_TYPE_SD, 0, 0x660fd4 },
+ { "pmullw", ORC_X86_INSN_TYPE_SD, 0, 0x660fd5 },
+ { "psubusb", ORC_X86_INSN_TYPE_SD, 0, 0x660fd8 },
+ { "psubusw", ORC_X86_INSN_TYPE_SD, 0, 0x660fd9 },
+ { "pminub", ORC_X86_INSN_TYPE_SD, 0, 0x660fda },
+ { "pand", ORC_X86_INSN_TYPE_SD, 0, 0x660fdb },
+ { "paddusb", ORC_X86_INSN_TYPE_SD, 0, 0x660fdc },
+ { "paddusw", ORC_X86_INSN_TYPE_SD, 0, 0x660fdd },
+ { "pmaxub", ORC_X86_INSN_TYPE_SD, 0, 0x660fde },
+ { "pandn", ORC_X86_INSN_TYPE_SD, 0, 0x660fdf },
+ { "pavgb", ORC_X86_INSN_TYPE_SD, 0, 0x660fe0 },
+ { "pavgw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe3 },
+ { "pmulhuw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe4 },
+ { "pmulhw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe5 },
+ { "psubsb", ORC_X86_INSN_TYPE_SD, 0, 0x660fe8 },
+ { "psubsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe9 },
+ { "pminsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fea },
+ { "por", ORC_X86_INSN_TYPE_SD, 0, 0x660feb },
+ { "paddsb", ORC_X86_INSN_TYPE_SD, 0, 0x660fec },
+ { "paddsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fed },
+ { "pmaxsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fee },
+ { "pxor", ORC_X86_INSN_TYPE_SD, 0, 0x660fef },
+ { "pmuludq", ORC_X86_INSN_TYPE_SD, 0, 0x660ff4 },
+ { "pmaddwd", ORC_X86_INSN_TYPE_SD, 0, 0x660ff5 },
+ { "psadbw", ORC_X86_INSN_TYPE_SD, 0, 0x660ff6 },
+ { "psubb", ORC_X86_INSN_TYPE_SD, 0, 0x660ff8 },
+ { "psubw", ORC_X86_INSN_TYPE_SD, 0, 0x660ff9 },
+ { "psubd", ORC_X86_INSN_TYPE_SD, 0, 0x660ffa },
+ { "psubq", ORC_X86_INSN_TYPE_SD, 0, 0x660ffb },
+ { "paddb", ORC_X86_INSN_TYPE_SD, 0, 0x660ffc },
+ { "paddw", ORC_X86_INSN_TYPE_SD, 0, 0x660ffd },
+ { "paddd", ORC_X86_INSN_TYPE_SD, 0, 0x660ffe },
+ { "pshufb", ORC_X86_INSN_TYPE_SD, 0, 0x660f3800 },
+ { "phaddw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3801 },
+ { "phaddd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3802 },
+ { "phaddsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3803 },
+ { "pmaddubsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3804 },
+ { "phsubw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3805 },
+ { "phsubd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3806 },
+ { "phsubsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3807 },
+ { "psignb", ORC_X86_INSN_TYPE_SD, 0, 0x660f3808 },
+ { "psignw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3809 },
+ { "psignd", ORC_X86_INSN_TYPE_SD, 0, 0x660f380a },
+ { "pmulhrsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f380b },
+ { "pabsb", ORC_X86_INSN_TYPE_SD, 0, 0x660f381c },
+ { "pabsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f381d },
+ { "pabsd", ORC_X86_INSN_TYPE_SD, 0, 0x660f381e },
+ { "pmovsxbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3820 },
+ { "pmovsxbd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3821 },
+ { "pmovsxbq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3822 },
+ { "pmovsxwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3823 },
+ { "pmovsxwq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3824 },
+ { "pmovsxdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3825 },
+ { "pmuldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3828 },
+ { "pcmpeqq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3829 },
+ { "packusdw", ORC_X86_INSN_TYPE_SD, 0, 0x660f382b },
+ { "pmovzxbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3830 },
+ { "pmovzxbd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3831 },
+ { "pmovzxbq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3832 },
+ { "pmovzxwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3833 },
+ { "pmovzxwq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3834 },
+ { "pmovzxdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3835 },
+ { "pmulld", ORC_X86_INSN_TYPE_SD, 0, 0x660f3840 },
+ { "phminposuw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3841 },
+ { "pminsb", ORC_X86_INSN_TYPE_SD, 0, 0x660f3838 },
+ { "pminsd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3839 },
+ { "pminuw", ORC_X86_INSN_TYPE_SD, 0, 0x660f383a },
+ { "pminud", ORC_X86_INSN_TYPE_SD, 0, 0x660f383b },
+ { "pmaxsb", ORC_X86_INSN_TYPE_SD, 0, 0x660f383c },
+ { "pmaxsd", ORC_X86_INSN_TYPE_SD, 0, 0x660f383d },
+ { "pmaxuw", ORC_X86_INSN_TYPE_SD, 0, 0x660f383e },
+ { "pmaxud", ORC_X86_INSN_TYPE_SD, 0, 0x660f383f },
+ { "pcmpgtq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3837 },
+ { "addps", ORC_X86_INSN_TYPE_SD, 0, 0x0f58 },
+ { "subps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5c },
+ { "mulps", ORC_X86_INSN_TYPE_SD, 0, 0x0f59 },
+ { "divps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5e },
+ { "sqrtps", ORC_X86_INSN_TYPE_SD, 0, 0x0f51 },
+ { "addpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 },
+ { "subpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5c },
+ { "mulpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f59 },
+ { "divpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5e },
+ { "sqrtpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f51 },
+ { "cmpeqps", ORC_X86_INSN_TYPE_SD2, 0, 0x0fc2, 0 },
+ { "cmpeqpd", ORC_X86_INSN_TYPE_SD2, 0, 0x660fc2, 0 },
+ { "cmpltps", ORC_X86_INSN_TYPE_SD2, 0, 0x0fc2, 1 },
+ { "cmpltpd", ORC_X86_INSN_TYPE_SD2, 0, 0x660fc2, 1 },
+ { "cmpleps", ORC_X86_INSN_TYPE_SD2, 0, 0x0fc2, 2 },
+ { "cmplepd", ORC_X86_INSN_TYPE_SD2, 0, 0x660fc2, 2 },
+ { "cvttps2dq", ORC_X86_INSN_TYPE_SD, 0, 0xf30f5b },
+ { "cvttpd2dq", ORC_X86_INSN_TYPE_SD, 0, 0x660fe6 },
+ { "cvtdq2ps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5b },
+ { "cvtdq2pd", ORC_X86_INSN_TYPE_SD, 0, 0xf30fe6 },
+ { "cvtps2pd", ORC_X86_INSN_TYPE_SD, 0, 0x0f5a },
+ { "cvtpd2ps", ORC_X86_INSN_TYPE_SD, 0, 0x660f5a },
+ { "minps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5d },
+ { "minpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5d },
+ { "maxps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5f },
+ { "maxpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5f },
+ { "psraw", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f71, 4 },
+ { "psrlw", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f71, 2 },
+ { "psllw", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f71, 6 },
+ { "psrad", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f72, 4 },
+ { "psrld", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f72, 2 },
+ { "pslld", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f72, 6 },
+ { "psrlq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 2 },
+ { "psllq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 6 },
+ { "psrldq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 3 },
+ { "pslldq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 7 },
+
+ //{ "", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 },
+ //{ "", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 },
+ //{ "addps", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 },
+};
+
+
+void
+orc_sse_emit_sysinsn (OrcCompiler *p, int index, int src, int dest)
+{
+ OrcSysOpcode *opcode = orc_x86_opcodes + index;
+
+ switch (opcode->type) {
+ case ORC_X86_INSN_TYPE_SD:
+ case ORC_X86_INSN_TYPE_SD2:
+ ORC_ASM_CODE(p," %s %%%s, %%%s\n", opcode->name,
+ orc_x86_get_regname_sse(src),
+ orc_x86_get_regname_sse(dest));
+ break;
+ case ORC_X86_INSN_TYPE_SHIFTIMM:
+ ORC_ASM_CODE(p," %s $%d, %%%s\n", opcode->name,
+ src,
+ orc_x86_get_regname_sse(dest));
+ break;
+ }
+
+ if (opcode->code & 0xff000000) {
+ *p->codeptr++ = (opcode->code >> 24) & 0xff;
+ orc_x86_emit_rex (p, 0, dest, 0, src);
+ *p->codeptr++ = (opcode->code >> 16) & 0xff;
+ *p->codeptr++ = (opcode->code >> 8) & 0xff;
+ *p->codeptr++ = (opcode->code >> 0) & 0xff;
+ } else if (opcode->code & 0xff0000) {
+ *p->codeptr++ = (opcode->code >> 16) & 0xff;
+ orc_x86_emit_rex (p, 0, dest, 0, src);
+ *p->codeptr++ = (opcode->code >> 8) & 0xff;
+ *p->codeptr++ = (opcode->code >> 0) & 0xff;
+ } else {
+ *p->codeptr++ = (opcode->code >> 8) & 0xff;
+ orc_x86_emit_rex (p, 0, dest, 0, src);
+ *p->codeptr++ = (opcode->code >> 0) & 0xff;
+ }
+
+ switch (opcode->type) {
+ case ORC_X86_INSN_TYPE_SD:
+ orc_x86_emit_modrm_reg (p, src, dest);
+ break;
+ case ORC_X86_INSN_TYPE_SHIFTIMM:
+ orc_x86_emit_modrm_reg (p, dest, opcode->code2);
+ *p->codeptr++ = src;
+ break;
+ case ORC_X86_INSN_TYPE_SD2:
+ orc_x86_emit_modrm_reg (p, src, dest);
+ *p->codeptr++ = opcode->code2;
+ break;
+ }
+
+}
+
--- /dev/null
+
+#ifndef ORC_ORC_X86_INSN_H_
+#define ORC_ORC_X86_INSN_H_
+
+#define ORC_X86_INSN_TYPE_SD 0
+#define ORC_X86_INSN_TYPE_SHIFTIMM 1
+#define ORC_X86_INSN_TYPE_SD2 2
+
+enum {
+ ORC_X86_punpcklbw,
+ ORC_X86_punpcklwd,
+ ORC_X86_punpckldq,
+ ORC_X86_packsswb,
+ ORC_X86_pcmpgtb,
+ ORC_X86_pcmpgtw,
+ ORC_X86_pcmpgtd,
+ ORC_X86_packuswb,
+ ORC_X86_punpckhbw,
+ ORC_X86_punpckhwd,
+ ORC_X86_punpckhdq,
+ ORC_X86_packssdw,
+ ORC_X86_punpcklqdq,
+ ORC_X86_punpckhqdq,
+ ORC_X86_movdqa,
+ ORC_X86_psraw,
+ ORC_X86_psrlw,
+ ORC_X86_psllw,
+ ORC_X86_psrad,
+ ORC_X86_psrld,
+ ORC_X86_pslld,
+ ORC_X86_psrlq,
+ ORC_X86_psllq,
+ ORC_X86_psrldq,
+ ORC_X86_pslldq,
+ ORC_X86_psrlq_reg,
+ ORC_X86_pcmpeqb,
+ ORC_X86_pcmpeqw,
+ ORC_X86_pcmpeqd,
+ ORC_X86_paddq,
+ ORC_X86_pmullw,
+ ORC_X86_psubusb,
+ ORC_X86_psubusw,
+ ORC_X86_pminub,
+ ORC_X86_pand,
+ ORC_X86_paddusb,
+ ORC_X86_paddusw,
+ ORC_X86_pmaxub,
+ ORC_X86_pandn,
+ ORC_X86_pavgb,
+ ORC_X86_pavgw,
+ ORC_X86_pmulhuw,
+ ORC_X86_pmulhw,
+ ORC_X86_psubsb,
+ ORC_X86_psubsw,
+ ORC_X86_pminsw,
+ ORC_X86_por,
+ ORC_X86_paddsb,
+ ORC_X86_paddsw,
+ ORC_X86_pmaxsw,
+ ORC_X86_pxor,
+ ORC_X86_pmuludq,
+ ORC_X86_pmaddwd,
+ ORC_X86_psadbw,
+ ORC_X86_psubb,
+ ORC_X86_psubw,
+ ORC_X86_psubd,
+ ORC_X86_psubq,
+ ORC_X86_paddb,
+ ORC_X86_paddw,
+ ORC_X86_paddd,
+ ORC_X86_pshufb,
+ ORC_X86_phaddw,
+ ORC_X86_phaddd,
+ ORC_X86_phaddsw,
+ ORC_X86_pmaddubsw,
+ ORC_X86_phsubw,
+ ORC_X86_phsubd,
+ ORC_X86_phsubsw,
+ ORC_X86_psignb,
+ ORC_X86_psignw,
+ ORC_X86_psignd,
+ ORC_X86_pmulhrsw,
+ ORC_X86_pabsb,
+ ORC_X86_pabsw,
+ ORC_X86_pabsd,
+ ORC_X86_pmovsxbw,
+ ORC_X86_pmovsxbd,
+ ORC_X86_pmovsxbq,
+ ORC_X86_pmovsxwd,
+ ORC_X86_pmovsxwq,
+ ORC_X86_pmovsxdq,
+ ORC_X86_pmuldq,
+ ORC_X86_pcmpeqq,
+ ORC_X86_packusdw,
+ ORC_X86_pmovzxbw,
+ ORC_X86_pmovzxbd,
+ ORC_X86_pmovzxbq,
+ ORC_X86_pmovzxwd,
+ ORC_X86_pmovzxwq,
+ ORC_X86_pmovzxdq,
+ ORC_X86_pmulld,
+ ORC_X86_phminposuw,
+ ORC_X86_pminsb,
+ ORC_X86_pminsd,
+ ORC_X86_pminuw,
+ ORC_X86_pminud,
+ ORC_X86_pmaxsb,
+ ORC_X86_pmaxsd,
+ ORC_X86_pmaxuw,
+ ORC_X86_pmaxud,
+ ORC_X86_pcmpgtq,
+ ORC_X86_addps,
+ ORC_X86_subps,
+ ORC_X86_mulps,
+ ORC_X86_divps,
+ ORC_X86_sqrtps,
+ ORC_X86_addpd,
+ ORC_X86_subpd,
+ ORC_X86_mulpd,
+ ORC_X86_divpd,
+ ORC_X86_sqrtpd,
+ ORC_X86_cmpeqps,
+ ORC_X86_cmpeqpd,
+ ORC_X86_cmpltps,
+ ORC_X86_cmpltpd,
+ ORC_X86_cmpleps,
+ ORC_X86_cmplepd,
+ ORC_X86_cvttps2dq,
+ ORC_X86_cvttpd2dq,
+ ORC_X86_cvtdq2ps,
+ ORC_X86_cvtdq2pd,
+ ORC_X86_cvtps2pd,
+ ORC_X86_cvtpd2ps,
+ ORC_X86_minps,
+ ORC_X86_minpd,
+ ORC_X86_maxps,
+ ORC_X86_maxpd,
+ ORC_X86_psraw_imm,
+ ORC_X86_psrlw_imm,
+ ORC_X86_psllw_imm,
+ ORC_X86_psrad_imm,
+ ORC_X86_psrld_imm,
+ ORC_X86_pslld_imm,
+ ORC_X86_psrlq_imm,
+ ORC_X86_psllq_imm,
+ ORC_X86_psrldq_imm,
+ ORC_X86_pslldq_imm,
+};
+
+
+
+#define orc_sse_emit_punpcklbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpcklbw, a, b)
+#define orc_sse_emit_punpcklwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpcklwd, a, b)
+#define orc_sse_emit_punpckldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckldq, a, b)
+#define orc_sse_emit_packsswb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packsswb, a, b)
+#define orc_sse_emit_pcmpgtb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtb, a, b)
+#define orc_sse_emit_pcmpgtw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtw, a, b)
+#define orc_sse_emit_pcmpgtd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtd, a, b)
+#define orc_sse_emit_packuswb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packuswb, a, b)
+#define orc_sse_emit_punpckhbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhbw, a, b)
+#define orc_sse_emit_punpckhwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhwd, a, b)
+#define orc_sse_emit_punpckhdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhdq, a, b)
+#define orc_sse_emit_packssdw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packssdw, a, b)
+#define orc_sse_emit_punpcklqdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpcklqdq, a, b)
+#define orc_sse_emit_punpckhqdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhqdq, a, b)
+#define orc_sse_emit_movdqa(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_movdqa, a, b)
+//#define orc_sse_emit_psraw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psraw, a, b)
+//#define orc_sse_emit_psrlw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlw, a, b)
+//#define orc_sse_emit_psllw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllw, a, b)
+//#define orc_sse_emit_psrad(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrad, a, b)
+//#define orc_sse_emit_psrld(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrld, a, b)
+//#define orc_sse_emit_pslld(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslld, a, b)
+//#define orc_sse_emit_psrlq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlq, a, b)
+//#define orc_sse_emit_psllq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllq, a, b)
+//#define orc_sse_emit_psrldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrldq, a, b)
+//#define orc_sse_emit_pslldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslldq, a, b)
+#define orc_sse_emit_psrlq_reg(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlq_reg, a, b)
+#define orc_sse_emit_pcmpeqb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqb, a, b)
+#define orc_sse_emit_pcmpeqw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqw, a, b)
+#define orc_sse_emit_pcmpeqd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqd, a, b)
+#define orc_sse_emit_paddq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddq, a, b)
+#define orc_sse_emit_pmullw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmullw, a, b)
+#define orc_sse_emit_psubusb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubusb, a, b)
+#define orc_sse_emit_psubusw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubusw, a, b)
+#define orc_sse_emit_pminub(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminub, a, b)
+#define orc_sse_emit_pand(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pand, a, b)
+#define orc_sse_emit_paddusb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddusb, a, b)
+#define orc_sse_emit_paddusw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddusw, a, b)
+#define orc_sse_emit_pmaxub(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxub, a, b)
+#define orc_sse_emit_pandn(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pandn, a, b)
+#define orc_sse_emit_pavgb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pavgb, a, b)
+#define orc_sse_emit_pavgw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pavgw, a, b)
+#define orc_sse_emit_pmulhuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulhuw, a, b)
+#define orc_sse_emit_pmulhw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulhw, a, b)
+#define orc_sse_emit_psubsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubsb, a, b)
+#define orc_sse_emit_psubsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubsw, a, b)
+#define orc_sse_emit_pminsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminsw, a, b)
+#define orc_sse_emit_por(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_por, a, b)
+#define orc_sse_emit_paddsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddsb, a, b)
+#define orc_sse_emit_paddsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddsw, a, b)
+#define orc_sse_emit_pmaxsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxsw, a, b)
+#define orc_sse_emit_pxor(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pxor, a, b)
+#define orc_sse_emit_pmuludq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmuludq, a, b)
+#define orc_sse_emit_pmaddwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaddwd, a, b)
+#define orc_sse_emit_psadbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psadbw, a, b)
+#define orc_sse_emit_psubb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubb, a, b)
+#define orc_sse_emit_psubw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubw, a, b)
+#define orc_sse_emit_psubd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubd, a, b)
+#define orc_sse_emit_psubq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubq, a, b)
+#define orc_sse_emit_paddb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddb, a, b)
+#define orc_sse_emit_paddw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddw, a, b)
+#define orc_sse_emit_paddd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddd, a, b)
+#define orc_sse_emit_pshufb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pshufb, a, b)
+#define orc_sse_emit_phaddw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phaddw, a, b)
+#define orc_sse_emit_phaddd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phaddd, a, b)
+#define orc_sse_emit_phaddsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phaddsw, a, b)
+#define orc_sse_emit_pmaddubsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaddubsw, a, b)
+#define orc_sse_emit_phsubw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phsubw, a, b)
+#define orc_sse_emit_phsubd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phsubd, a, b)
+#define orc_sse_emit_phsubsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phsubsw, a, b)
+#define orc_sse_emit_psignb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psignb, a, b)
+#define orc_sse_emit_psignw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psignw, a, b)
+#define orc_sse_emit_psignd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psignd, a, b)
+#define orc_sse_emit_pmulhrsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulhrsw, a, b)
+#define orc_sse_emit_pabsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pabsb, a, b)
+#define orc_sse_emit_pabsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pabsw, a, b)
+#define orc_sse_emit_pabsd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pabsd, a, b)
+#define orc_sse_emit_pmovsxbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxbw, a, b)
+#define orc_sse_emit_pmovsxbd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxbd, a, b)
+#define orc_sse_emit_pmovsxbq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxbq, a, b)
+#define orc_sse_emit_pmovsxwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxwd, a, b)
+#define orc_sse_emit_pmovsxwq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxwq, a, b)
+#define orc_sse_emit_pmovsxdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxdq, a, b)
+#define orc_sse_emit_pmuldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmuldq, a, b)
+#define orc_sse_emit_pcmpeqq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqq, a, b)
+#define orc_sse_emit_packusdw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packusdw, a, b)
+#define orc_sse_emit_pmovzxbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxbw, a, b)
+#define orc_sse_emit_pmovzxbd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxbd, a, b)
+#define orc_sse_emit_pmovzxbq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxbq, a, b)
+#define orc_sse_emit_pmovzxwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxwd, a, b)
+#define orc_sse_emit_pmovzxwq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxwq, a, b)
+#define orc_sse_emit_pmovzxdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxdq, a, b)
+#define orc_sse_emit_pmulld(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulld, a, b)
+#define orc_sse_emit_phminposuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phminposuw, a, b)
+#define orc_sse_emit_pminsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminsb, a, b)
+#define orc_sse_emit_pminsd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminsd, a, b)
+#define orc_sse_emit_pminuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminuw, a, b)
+#define orc_sse_emit_pminud(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminud, a, b)
+#define orc_sse_emit_pmaxsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxsb, a, b)
+#define orc_sse_emit_pmaxsd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxsd, a, b)
+#define orc_sse_emit_pmaxuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxuw, a, b)
+#define orc_sse_emit_pmaxud(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxud, a, b)
+#define orc_sse_emit_pcmpgtq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtq, a, b)
+#define orc_sse_emit_addps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_addps, a, b)
+#define orc_sse_emit_subps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_subps, a, b)
+#define orc_sse_emit_mulps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_mulps, a, b)
+#define orc_sse_emit_divps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_divps, a, b)
+#define orc_sse_emit_sqrtps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_sqrtps, a, b)
+#define orc_sse_emit_addpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_addpd, a, b)
+#define orc_sse_emit_subpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_subpd, a, b)
+#define orc_sse_emit_mulpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_mulpd, a, b)
+#define orc_sse_emit_divpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_divpd, a, b)
+#define orc_sse_emit_sqrtpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_sqrtpd, a, b)
+#define orc_sse_emit_cmpeqps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpeqps, a, b)
+#define orc_sse_emit_cmpeqpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpeqpd, a, b)
+#define orc_sse_emit_cmpltps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpltps, a, b)
+#define orc_sse_emit_cmpltpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpltpd, a, b)
+#define orc_sse_emit_cmpleps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpleps, a, b)
+#define orc_sse_emit_cmplepd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmplepd, a, b)
+#define orc_sse_emit_cvttps2dq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvttps2dq, a, b)
+#define orc_sse_emit_cvttpd2dq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvttpd2dq, a, b)
+#define orc_sse_emit_cvtdq2ps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtdq2ps, a, b)
+#define orc_sse_emit_cvtdq2pd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtdq2pd, a, b)
+#define orc_sse_emit_cvtps2pd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtps2pd, a, b)
+#define orc_sse_emit_cvtpd2ps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtpd2ps, a, b)
+#define orc_sse_emit_minps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_minps, a, b)
+#define orc_sse_emit_minpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_minpd, a, b)
+#define orc_sse_emit_maxps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_maxps, a, b)
+#define orc_sse_emit_maxpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_maxpd, a, b)
+#define orc_sse_emit_psraw_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psraw_imm, a, b)
+#define orc_sse_emit_psrlw_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlw_imm, a, b)
+#define orc_sse_emit_psllw_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllw_imm, a, b)
+#define orc_sse_emit_psrad_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrad_imm, a, b)
+#define orc_sse_emit_psrld_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrld_imm, a, b)
+#define orc_sse_emit_pslld_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslld_imm, a, b)
+#define orc_sse_emit_psrlq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlq_imm, a, b)
+#define orc_sse_emit_psllq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllq_imm, a, b)
+#define orc_sse_emit_psrldq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrldq_imm, a, b)
+#define orc_sse_emit_pslldq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslldq_imm, a, b)
+
+#endif
+