From 3936ea757082b8772cb3a56dd289751ac4de9205 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Wed, 27 Oct 2010 17:14:48 +0100 Subject: [PATCH] sse: convert to using orc_sse_emit_sysinsn --- examples/volscale.c | 2 +- orc/Makefile.am | 5 +- orc/orcprogram-sse.c | 8 +- orc/orcrules-sse.c | 541 +++++++++++++++++++++++++-------------------------- orc/orcsse.h | 134 +------------ orc/orcsysinsn.h | 35 ++++ orc/orcx86insn.c | 207 ++++++++++++++++++++ orc/orcx86insn.h | 292 +++++++++++++++++++++++++++ 8 files changed, 814 insertions(+), 410 deletions(-) create mode 100644 orc/orcsysinsn.h create mode 100644 orc/orcx86insn.c create mode 100644 orc/orcx86insn.h diff --git a/examples/volscale.c b/examples/volscale.c index c8dac72..a5952fa 100644 --- a/examples/volscale.c +++ b/examples/volscale.c @@ -55,7 +55,7 @@ sse_rule_mulhslw (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, src2, tmp2); orc_sse_emit_pmulhw (p, src1, src2); /* .. | 0 | vl*p0 | */ orc_sse_emit_paddw (p, tmp1, src2); /* .. | 0 | vl*p0 | + sign correct */ - orc_sse_emit_psrld (p, 16, dest); /* .. | 0 | vh | */ + orc_sse_emit_psrld_imm (p, 16, dest); /* .. | 0 | vh | */ orc_sse_emit_pmaddwd (p, tmp2, dest); /* .. | p0 * vh | */ orc_sse_emit_paddd (p, src2, dest); /* .. | p0 * v0 | */ } diff --git a/orc/Makefile.am b/orc/Makefile.am index 32b9252..3f0acc0 100644 --- a/orc/Makefile.am +++ b/orc/Makefile.am @@ -30,7 +30,7 @@ liborc_@ORC_MAJORMINOR@_la_SOURCES = \ if ENABLE_BACKEND_SSE liborc_@ORC_MAJORMINOR@_la_SOURCES += orcsse.c orcrules-sse.c orcprogram-sse.c -liborc_@ORC_MAJORMINOR@_la_SOURCES += orcx86.c +liborc_@ORC_MAJORMINOR@_la_SOURCES += orcx86.c orcx86insn.c endif if ENABLE_BACKEND_MMX liborc_@ORC_MAJORMINOR@_la_SOURCES += orcmmx.c orcrules-mmx.c orcprogram-mmx.c @@ -86,7 +86,8 @@ pkginclude_HEADERS = \ orcneon.h \ orcx86.h \ orcpowerpc.h \ - orcarm.h + orcarm.h \ + orcx86insn.h nodist_pkginclude_HEADERS = orc-stdint.h diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 21c980c..e0dcaad 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -402,13 +402,13 @@ orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 valu v = (0xffffffff<>i); if (value == v) { orc_sse_emit_pcmpeqb (compiler, reg, reg); - orc_sse_emit_psrld (compiler, i, reg); + orc_sse_emit_psrld_imm (compiler, i, reg); return; } } @@ -417,13 +417,13 @@ orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 valu v = (0xffff & (0xffff<>i)) | (0xffff0000 & (0xffff0000>>i)); if (value == v) { orc_sse_emit_pcmpeqb (compiler, reg, reg); - orc_sse_emit_psrlw (compiler, i, reg); + orc_sse_emit_psrlw_imm (compiler, i, reg); return; } } diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 2b35983..a631114 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -212,7 +212,7 @@ sse_rule_loadupib (OrcCompiler *compiler, void *user, OrcInstruction *insn) case 2: orc_sse_emit_pinsrw_memoffset (compiler, 0, offset, ptr_reg, dest->alloc); orc_sse_emit_movdqa (compiler, dest->alloc, tmp); - orc_sse_emit_psrlw (compiler, 8, tmp); + orc_sse_emit_psrlw_imm (compiler, 8, tmp); break; case 4: orc_sse_emit_pinsrw_memoffset (compiler, 0, offset, ptr_reg, dest->alloc); @@ -394,15 +394,15 @@ sse_rule_ldresnearl (OrcCompiler *compiler, void *user, OrcInstruction *insn) #if 0 orc_sse_emit_movdqa (compiler, X86_XMM6, tmp); - orc_sse_emit_pslld (compiler, 10, tmp); - orc_sse_emit_psrld (compiler, 26, tmp); - orc_sse_emit_pslld (compiler, 2, tmp); + orc_sse_emit_pslld_imm (compiler, 10, tmp); + orc_sse_emit_psrld_imm (compiler, 26, tmp); + orc_sse_emit_pslld_imm (compiler, 2, tmp); orc_sse_emit_movdqa (compiler, tmp, tmp2); - orc_sse_emit_pslld (compiler, 8, tmp2); + orc_sse_emit_pslld_imm (compiler, 8, tmp2); orc_sse_emit_por (compiler, tmp2, tmp); orc_sse_emit_movdqa (compiler, tmp, tmp2); - orc_sse_emit_pslld (compiler, 16, tmp2); + orc_sse_emit_pslld_imm (compiler, 16, tmp2); orc_sse_emit_por (compiler, tmp2, tmp); #else orc_sse_emit_movdqa (compiler, X86_XMM6, tmp); @@ -421,7 +421,7 @@ sse_rule_ldresnearl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_sse_emit_pshufb (compiler, tmp, dest->alloc); orc_sse_emit_movdqa (compiler, X86_XMM7, tmp); - orc_sse_emit_pslld (compiler, compiler->loop_shift, tmp); + orc_sse_emit_pslld_imm (compiler, compiler->loop_shift, tmp); orc_sse_emit_paddd (compiler, tmp, X86_XMM6); @@ -447,10 +447,10 @@ sse_rule_ldresnearl (OrcCompiler *compiler, void *user, OrcInstruction *insn) src->ptr_register, compiler->gp_tmpreg, 2, tmp, FALSE); #ifdef MMX //orc_mmx_emit_punpckldq (compiler, tmp, dest->alloc); - orc_sse_emit_psllq (compiler, 8*4*i, tmp); + orc_sse_emit_psllq_imm (compiler, 8*4*i, tmp); orc_sse_emit_por (compiler, tmp, dest->alloc); #else - orc_sse_emit_pslldq (compiler, 4*i, tmp); + orc_sse_emit_pslldq_imm (compiler, 4*i, tmp); orc_sse_emit_por (compiler, tmp, dest->alloc); #endif } @@ -500,9 +500,9 @@ sse_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_x86_emit_mov_reg_sse (compiler, src->ptr_offset, tmp); orc_sse_emit_pshuflw (compiler, ORC_SSE_SHUF(0,0,0,0), tmp, tmp); - orc_sse_emit_psrlw (compiler, 8, tmp); + orc_sse_emit_psrlw_imm (compiler, 8, tmp); orc_sse_emit_pmullw (compiler, tmp2, tmp); - orc_sse_emit_psraw (compiler, 8, tmp); + orc_sse_emit_psraw_imm (compiler, 8, tmp); orc_sse_emit_pxor (compiler, tmp2, tmp2); orc_sse_emit_packsswb (compiler, tmp2, tmp); @@ -581,14 +581,14 @@ sse_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_sse_emit_pshuflw (compiler, ORC_SSE_SHUF(1,1,0,0), tmp4, tmp4); orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,1,0,0), tmp4, tmp4); #endif - orc_sse_emit_psrlw (compiler, 8, tmp4); + orc_sse_emit_psrlw_imm (compiler, 8, tmp4); orc_sse_emit_pmullw (compiler, tmp4, tmp2); - orc_sse_emit_psraw (compiler, 8, tmp2); + orc_sse_emit_psraw_imm (compiler, 8, tmp2); orc_sse_emit_pxor (compiler, tmp, tmp); orc_sse_emit_packsswb (compiler, tmp, tmp2); if (i != 0) { - orc_sse_emit_pslldq (compiler, 8, tmp2); + orc_sse_emit_pslldq_imm (compiler, 8, tmp2); } orc_sse_emit_paddb (compiler, tmp2, dest->alloc); @@ -639,9 +639,9 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp); orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp); - orc_mmx_emit_psrlw (compiler, 8, tmp); + orc_mmx_emit_psrlw_imm (compiler, 8, tmp); orc_mmx_emit_pmullw (compiler, tmp2, tmp); - orc_mmx_emit_psraw (compiler, 8, tmp); + orc_mmx_emit_psraw_imm (compiler, 8, tmp); orc_mmx_emit_pxor (compiler, tmp2, tmp2); orc_mmx_emit_packsswb (compiler, tmp2, tmp); @@ -653,7 +653,7 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_x86_emit_mov_memoffset_mmx (compiler, 4, 0, src->ptr_register, tmp2, FALSE); orc_mmx_emit_paddb (compiler, tmp, tmp2); - orc_mmx_emit_psllq (compiler, 32, tmp2); + orc_mmx_emit_psllq_imm (compiler, 32, tmp2); orc_mmx_emit_por (compiler, tmp2, dest->alloc); } @@ -695,7 +695,7 @@ sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn) static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - orc_sse_emit_660f (p, insn_name, code, \ + orc_sse_emit_ ## insn_name (p, \ p->vars[insn->src_args[0]].alloc, \ p->vars[insn->dest_args[0]].alloc); \ } @@ -704,91 +704,91 @@ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - orc_sse_emit_660f (p, insn_name, code, \ + orc_sse_emit_ ## insn_name (p, \ p->vars[insn->src_args[1]].alloc, \ p->vars[insn->dest_args[0]].alloc); \ } -UNARY(absb,"pabsb",0x381c) -BINARY(addb,"paddb",0xfc) -BINARY(addssb,"paddsb",0xec) -BINARY(addusb,"paddusb",0xdc) -BINARY(andb,"pand",0xdb) -BINARY(andnb,"pandn",0xdf) -BINARY(avgub,"pavgb",0xe0) -BINARY(cmpeqb,"pcmpeqb",0x74) -BINARY(cmpgtsb,"pcmpgtb",0x64) -BINARY(maxsb,"pmaxsb",0x383c) -BINARY(maxub,"pmaxub",0xde) -BINARY(minsb,"pminsb",0x3838) -BINARY(minub,"pminub",0xda) -//BINARY(mullb,"pmullb",0xd5) -//BINARY(mulhsb,"pmulhb",0xe5) -//BINARY(mulhub,"pmulhub",0xe4) -BINARY(orb,"por",0xeb) -//UNARY(signb,"psignb",0x3808) -BINARY(subb,"psubb",0xf8) -BINARY(subssb,"psubsb",0xe8) -BINARY(subusb,"psubusb",0xd8) -BINARY(xorb,"pxor",0xef) - -UNARY(absw,"pabsw",0x381d) -BINARY(addw,"paddw",0xfd) -BINARY(addssw,"paddsw",0xed) -BINARY(addusw,"paddusw",0xdd) -BINARY(andw,"pand",0xdb) -BINARY(andnw,"pandn",0xdf) -BINARY(avguw,"pavgw",0xe3) -BINARY(cmpeqw,"pcmpeqw",0x75) -BINARY(cmpgtsw,"pcmpgtw",0x65) -BINARY(maxsw,"pmaxsw",0xee) -BINARY(maxuw,"pmaxuw",0x383e) -BINARY(minsw,"pminsw",0xea) -BINARY(minuw,"pminuw",0x383a) -BINARY(mullw,"pmullw",0xd5) -BINARY(mulhsw,"pmulhw",0xe5) -BINARY(mulhuw,"pmulhuw",0xe4) -BINARY(orw,"por",0xeb) -//UNARY(signw,"psignw",0x3809) -BINARY(subw,"psubw",0xf9) -BINARY(subssw,"psubsw",0xe9) -BINARY(subusw,"psubusw",0xd9) -BINARY(xorw,"pxor",0xef) - -UNARY(absl,"pabsd",0x381e) -BINARY(addl,"paddd",0xfe) -//BINARY(addssl,"paddsd",0xed) -//BINARY(addusl,"paddusd",0xdd) -BINARY(andl,"pand",0xdb) -BINARY(andnl,"pandn",0xdf) -//BINARY(avgul,"pavgd",0xe3) -BINARY(cmpeql,"pcmpeqd",0x76) -BINARY(cmpgtsl,"pcmpgtd",0x66) -BINARY(maxsl,"pmaxsd",0x383d) -BINARY(maxul,"pmaxud",0x383f) -BINARY(minsl,"pminsd",0x3839) -BINARY(minul,"pminud",0x383b) -BINARY(mulll,"pmulld",0x3840) -//BINARY(mulhsl,"pmulhd",0xe5) -//BINARY(mulhul,"pmulhud",0xe4) -BINARY(orl,"por",0xeb) -//UNARY(signl,"psignd",0x380a) -BINARY(subl,"psubd",0xfa) -//BINARY(subssl,"psubsd",0xe9) -//BINARY(subusl,"psubusd",0xd9) -BINARY(xorl,"pxor",0xef) - -BINARY(andq,"pand",0xdb) -BINARY(andnq,"pandn",0xdf) -BINARY(orq,"por",0xeb) -BINARY(xorq,"pxor",0xef) -BINARY(cmpeqq,"pcmpeqq",0x3829) -BINARY(cmpgtsq,"pcmpgtq",0x3837) +UNARY(absb,pabsb,0x381c) +BINARY(addb,paddb,0xfc) +BINARY(addssb,paddsb,0xec) +BINARY(addusb,paddusb,0xdc) +BINARY(andb,pand,0xdb) +BINARY(andnb,pandn,0xdf) +BINARY(avgub,pavgb,0xe0) +BINARY(cmpeqb,pcmpeqb,0x74) +BINARY(cmpgtsb,pcmpgtb,0x64) +BINARY(maxsb,pmaxsb,0x383c) +BINARY(maxub,pmaxub,0xde) +BINARY(minsb,pminsb,0x3838) +BINARY(minub,pminub,0xda) +//BINARY(mullb,pmullb,0xd5) +//BINARY(mulhsb,pmulhb,0xe5) +//BINARY(mulhub,pmulhub,0xe4) +BINARY(orb,por,0xeb) +//UNARY(signb,psignb,0x3808) +BINARY(subb,psubb,0xf8) +BINARY(subssb,psubsb,0xe8) +BINARY(subusb,psubusb,0xd8) +BINARY(xorb,pxor,0xef) + +UNARY(absw,pabsw,0x381d) +BINARY(addw,paddw,0xfd) +BINARY(addssw,paddsw,0xed) +BINARY(addusw,paddusw,0xdd) +BINARY(andw,pand,0xdb) +BINARY(andnw,pandn,0xdf) +BINARY(avguw,pavgw,0xe3) +BINARY(cmpeqw,pcmpeqw,0x75) +BINARY(cmpgtsw,pcmpgtw,0x65) +BINARY(maxsw,pmaxsw,0xee) +BINARY(maxuw,pmaxuw,0x383e) +BINARY(minsw,pminsw,0xea) +BINARY(minuw,pminuw,0x383a) +BINARY(mullw,pmullw,0xd5) +BINARY(mulhsw,pmulhw,0xe5) +BINARY(mulhuw,pmulhuw,0xe4) +BINARY(orw,por,0xeb) +//UNARY(signw,psignw,0x3809) +BINARY(subw,psubw,0xf9) +BINARY(subssw,psubsw,0xe9) +BINARY(subusw,psubusw,0xd9) +BINARY(xorw,pxor,0xef) + +UNARY(absl,pabsd,0x381e) +BINARY(addl,paddd,0xfe) +//BINARY(addssl,paddsd,0xed) +//BINARY(addusl,paddusd,0xdd) +BINARY(andl,pand,0xdb) +BINARY(andnl,pandn,0xdf) +//BINARY(avgul,pavgd,0xe3) +BINARY(cmpeql,pcmpeqd,0x76) +BINARY(cmpgtsl,pcmpgtd,0x66) +BINARY(maxsl,pmaxsd,0x383d) +BINARY(maxul,pmaxud,0x383f) +BINARY(minsl,pminsd,0x3839) +BINARY(minul,pminud,0x383b) +BINARY(mulll,pmulld,0x3840) +//BINARY(mulhsl,pmulhd,0xe5) +//BINARY(mulhul,pmulhud,0xe4) +BINARY(orl,por,0xeb) +//UNARY(signl,psignd,0x380a) +BINARY(subl,psubd,0xfa) +//BINARY(subssl,psubsd,0xe9) +//BINARY(subusl,psubusd,0xd9) +BINARY(xorl,pxor,0xef) + +BINARY(andq,pand,0xdb) +BINARY(andnq,pandn,0xdf) +BINARY(orq,por,0xeb) +BINARY(xorq,pxor,0xef) +BINARY(cmpeqq,pcmpeqq,0x3829) +BINARY(cmpgtsq,pcmpgtq,0x3837) #ifndef MMX -BINARY(addq,"paddq",0xd4) -BINARY(subq,"psubq",0xfb) +BINARY(addq,paddq,0xd4) +BINARY(subq,psubq,0xfb) #endif static void @@ -808,7 +808,7 @@ sse_rule_accl (OrcCompiler *p, void *user, OrcInstruction *insn) #ifndef MMX if (p->loop_shift == 0) { - orc_sse_emit_pslldq (p, 12, src); + orc_sse_emit_pslldq_imm (p, 12, src); } #endif orc_sse_emit_paddd (p, src, dest); @@ -826,14 +826,14 @@ sse_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn) #ifndef MMX if (p->loop_shift <= 2) { orc_sse_emit_movdqa (p, src1, tmp); - orc_sse_emit_pslldq (p, 16 - (1<loop_shift), tmp); + orc_sse_emit_pslldq_imm (p, 16 - (1<loop_shift), tmp); orc_sse_emit_movdqa (p, src2, tmp2); - orc_sse_emit_pslldq (p, 16 - (1<loop_shift), tmp2); + orc_sse_emit_pslldq_imm (p, 16 - (1<loop_shift), tmp2); orc_sse_emit_psadbw (p, tmp2, tmp); } else if (p->loop_shift == 3) { orc_sse_emit_movdqa (p, src1, tmp); orc_sse_emit_psadbw (p, src2, tmp); - orc_sse_emit_pslldq (p, 8, tmp); + orc_sse_emit_pslldq_imm (p, 8, tmp); } else { orc_sse_emit_movdqa (p, src1, tmp); orc_sse_emit_psadbw (p, src2, tmp); @@ -841,9 +841,9 @@ sse_rule_accsadubl (OrcCompiler *p, void *user, OrcInstruction *insn) #else if (p->loop_shift <= 2) { orc_sse_emit_movdqa (p, src1, tmp); - orc_sse_emit_psllq (p, 8*(8 - (1<loop_shift)), tmp); + orc_sse_emit_psllq_imm (p, 8*(8 - (1<loop_shift)), tmp); orc_sse_emit_movdqa (p, src2, tmp2); - orc_sse_emit_psllq (p, 8*(8 - (1<loop_shift)), tmp2); + orc_sse_emit_psllq_imm (p, 8*(8 - (1<loop_shift)), tmp2); orc_sse_emit_psadbw (p, tmp2, tmp); } else { orc_sse_emit_movdqa (p, src1, tmp); @@ -858,19 +858,18 @@ sse_rule_signX_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) { int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - const char * names[] = { "psignb", "psignw", "psignd" }; - int codes[] = { 0x3808, 0x3809, 0x380a }; + int opcodes[] = { ORC_X86_psignb, ORC_X86_psignw, ORC_X86_psignd }; int type = ORC_PTR_TO_INT(user); int tmpc; tmpc = orc_compiler_get_temp_constant (p, 1<vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_sse_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type], + orc_sse_emit_sysinsn (p, opcodes_imm[type], p->vars[insn->src_args[1]].value.i, p->vars[insn->dest_args[0]].alloc); } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { @@ -962,7 +967,7 @@ sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]), p->exec_reg, tmp, FALSE); - orc_sse_emit_660f (p, code[type], reg_code[type], tmp, + orc_sse_emit_sysinsn (p, opcodes[type], tmp, p->vars[insn->dest_args[0]].alloc); } else { ORC_COMPILER_ERROR(p,"rule only works with constants or params"); @@ -977,7 +982,7 @@ sse_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_sse_emit_psllw (p, p->vars[insn->src_args[1]].value.i, dest); + orc_sse_emit_psllw_imm (p, p->vars[insn->src_args[1]].value.i, dest); tmp = orc_compiler_get_constant (p, 1, 0xff&(0xff<vars[insn->src_args[1]].value.i)); orc_sse_emit_pand (p, tmp, dest); @@ -996,12 +1001,12 @@ sse_rule_shrsb (OrcCompiler *p, void *user, OrcInstruction *insn) if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_psllw (p, 8, tmp); - orc_sse_emit_psraw (p, p->vars[insn->src_args[1]].value.i, tmp); - orc_sse_emit_psrlw (p, 8, tmp); + orc_sse_emit_psllw_imm (p, 8, tmp); + orc_sse_emit_psraw_imm (p, p->vars[insn->src_args[1]].value.i, tmp); + orc_sse_emit_psrlw_imm (p, 8, tmp); - orc_sse_emit_psraw (p, 8 + p->vars[insn->src_args[1]].value.i, dest); - orc_sse_emit_psllw (p, 8, dest); + orc_sse_emit_psraw_imm (p, 8 + p->vars[insn->src_args[1]].value.i, dest); + orc_sse_emit_psllw_imm (p, 8, dest); orc_sse_emit_por (p, tmp, dest); } else { @@ -1017,7 +1022,7 @@ sse_rule_shrub (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_sse_emit_psrlw (p, p->vars[insn->src_args[1]].value.i, dest); + orc_sse_emit_psrlw_imm (p, p->vars[insn->src_args[1]].value.i, dest); tmp = orc_compiler_get_constant (p, 1, (0xff>>p->vars[insn->src_args[1]].value.i)); orc_sse_emit_pand (p, tmp, dest); @@ -1040,10 +1045,10 @@ sse_rule_shrsq (OrcCompiler *p, void *user, OrcInstruction *insn) #else orc_mmx_emit_pshufw (p, ORC_MMX_SHUF(3,2,3,2), src, tmp); #endif - orc_sse_emit_psrad (p, 31, tmp); - orc_sse_emit_psllq (p, 64-p->vars[insn->src_args[1]].value.i, tmp); + orc_sse_emit_psrad_imm (p, 31, tmp); + orc_sse_emit_psllq_imm (p, 64-p->vars[insn->src_args[1]].value.i, tmp); - orc_sse_emit_psrlq (p, p->vars[insn->src_args[1]].value.i, dest); + orc_sse_emit_psrlq_imm (p, p->vars[insn->src_args[1]].value.i, dest); orc_sse_emit_por (p, tmp, dest); } else { ORC_COMPILER_ERROR(p,"rule only works with constants"); @@ -1058,7 +1063,7 @@ sse_rule_convsbw (OrcCompiler *p, void *user, OrcInstruction *insn) int dest = p->vars[insn->dest_args[0]].alloc; orc_sse_emit_punpcklbw (p, src, dest); - orc_sse_emit_psraw (p, 8, dest); + orc_sse_emit_psraw_imm (p, 8, dest); } static void @@ -1071,7 +1076,7 @@ sse_rule_convubw (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME need a zero register */ if (0) { orc_sse_emit_punpcklbw (p, src, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); } else { orc_sse_emit_pxor(p, tmp, tmp); orc_sse_emit_punpcklbw (p, tmp, dest); @@ -1105,10 +1110,10 @@ sse_rule_convuuswb (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_movdqa (p, src, dest); - orc_sse_emit_psrlw (p, 15, tmp); - orc_sse_emit_psllw (p, 14, tmp); + orc_sse_emit_psrlw_imm (p, 15, tmp); + orc_sse_emit_psllw_imm (p, 14, tmp); orc_sse_emit_por (p, tmp, dest); - orc_sse_emit_psllw (p, 1, tmp); + orc_sse_emit_psllw_imm (p, 1, tmp); orc_sse_emit_pxor (p, tmp, dest); orc_sse_emit_packuswb (p, dest, dest); } @@ -1118,8 +1123,8 @@ sse_rule_convwb (OrcCompiler *p, void *user, OrcInstruction *insn) { int dest = p->vars[insn->dest_args[0]].alloc; - orc_sse_emit_psllw (p, 8, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_packuswb (p, dest, dest); } @@ -1128,7 +1133,7 @@ sse_rule_convhwb (OrcCompiler *p, void *user, OrcInstruction *insn) { int dest = p->vars[insn->dest_args[0]].alloc; - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_packuswb (p, dest, dest); } @@ -1139,7 +1144,7 @@ sse_rule_convswl (OrcCompiler *p, void *user, OrcInstruction *insn) int dest = p->vars[insn->dest_args[0]].alloc; orc_sse_emit_punpcklwd (p, src, dest); - orc_sse_emit_psrad (p, 16, dest); + orc_sse_emit_psrad_imm (p, 16, dest); } static void @@ -1152,7 +1157,7 @@ sse_rule_convuwl (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME need a zero register */ if (0) { orc_sse_emit_punpcklwd (p, src, dest); - orc_sse_emit_psrld (p, 16, dest); + orc_sse_emit_psrld_imm (p, 16, dest); } else { orc_sse_emit_pxor(p, tmp, tmp); orc_sse_emit_punpcklwd (p, tmp, dest); @@ -1164,8 +1169,8 @@ sse_rule_convlw (OrcCompiler *p, void *user, OrcInstruction *insn) { int dest = p->vars[insn->dest_args[0]].alloc; - orc_sse_emit_pslld (p, 16, dest); - orc_sse_emit_psrad (p, 16, dest); + orc_sse_emit_pslld_imm (p, 16, dest); + orc_sse_emit_psrad_imm (p, 16, dest); orc_sse_emit_packssdw (p, dest, dest); } @@ -1174,7 +1179,7 @@ sse_rule_convhlw (OrcCompiler *p, void *user, OrcInstruction *insn) { int dest = p->vars[insn->dest_args[0]].alloc; - orc_sse_emit_psrad (p, 16, dest); + orc_sse_emit_psrad_imm (p, 16, dest); orc_sse_emit_packssdw (p, dest, dest); } @@ -1204,7 +1209,7 @@ sse_rule_convslq (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_psrad (p, 31, tmp); + orc_sse_emit_psrad_imm (p, 31, tmp); orc_sse_emit_punpckldq (p, tmp, dest); } @@ -1271,9 +1276,9 @@ sse_rule_div255w (OrcCompiler *p, void *user, OrcInstruction *insn) tmpc = orc_compiler_get_constant (p, 2, 0x0080); orc_sse_emit_paddw (p, tmpc, dest); orc_sse_emit_movdqa (p, dest, tmp); - orc_sse_emit_psrlw (p, 8, tmp); + orc_sse_emit_psrlw_imm (p, 8, tmp); orc_sse_emit_paddw (p, tmp, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); } #if 1 @@ -1292,13 +1297,13 @@ sse_rule_divluw (OrcCompiler *p, void *user, OrcInstruction *insn) int i; orc_sse_emit_movdqa (p, src, divisor); - orc_sse_emit_psllw (p, 8, divisor); - orc_sse_emit_psrlw (p, 1, divisor); + orc_sse_emit_psllw_imm (p, 8, divisor); + orc_sse_emit_psrlw_imm (p, 1, divisor); orc_sse_load_constant (p, a, 2, 0x00ff); tmp = orc_compiler_get_constant (p, 2, 0x8000); orc_sse_emit_movdqa (p, tmp, j); - orc_sse_emit_psrlw (p, 8, j); + orc_sse_emit_psrlw_imm (p, 8, j); orc_sse_emit_pxor (p, tmp, dest); @@ -1309,11 +1314,11 @@ sse_rule_divluw (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, l, j2); orc_sse_emit_pandn (p, divisor, l); orc_sse_emit_psubw (p, l, dest); - orc_sse_emit_psrlw (p, 1, divisor); + orc_sse_emit_psrlw_imm (p, 1, divisor); orc_sse_emit_pand (p, j, j2); orc_sse_emit_pxor (p, j2, a); - orc_sse_emit_psrlw (p, 1, j); + orc_sse_emit_psrlw_imm (p, 1, j); } orc_sse_emit_movdqa (p, divisor, l); @@ -1347,7 +1352,7 @@ sse_rule_divluw (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_pxor (p, a, a); orc_sse_emit_movdqa (p, tmp, j); - orc_sse_emit_psrlw (p, 8, j); + orc_sse_emit_psrlw_imm (p, 8, j); for(i=0;i<8;i++){ orc_sse_emit_por (p, j, a); @@ -1357,7 +1362,7 @@ sse_rule_divluw (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_pcmpgtw (p, b, k); orc_sse_emit_pand (p, j, k); orc_sse_emit_pxor (p, k, a); - orc_sse_emit_psrlw (p, 1, j); + orc_sse_emit_psrlw_imm (p, 1, j); } orc_sse_emit_movdqa (p, a, dest); @@ -1372,9 +1377,9 @@ sse_rule_mulsbw (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp = orc_compiler_get_temp_reg (p); orc_sse_emit_punpcklbw (p, src, tmp); - orc_sse_emit_psraw (p, 8, tmp); + orc_sse_emit_psraw_imm (p, 8, tmp); orc_sse_emit_punpcklbw (p, dest, dest); - orc_sse_emit_psraw (p, 8, dest); + orc_sse_emit_psraw_imm (p, 8, dest); orc_sse_emit_pmullw (p, tmp, dest); } @@ -1386,9 +1391,9 @@ sse_rule_mulubw (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp = orc_compiler_get_temp_reg (p); orc_sse_emit_punpcklbw (p, src, tmp); - orc_sse_emit_psrlw (p, 8, tmp); + orc_sse_emit_psrlw_imm (p, 8, tmp); orc_sse_emit_punpcklbw (p, dest, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_pmullw (p, tmp, dest); } @@ -1403,14 +1408,14 @@ sse_rule_mullb (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, dest, tmp); orc_sse_emit_pmullw (p, src, dest); - orc_sse_emit_psllw (p, 8, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_movdqa (p, src, tmp2); - orc_sse_emit_psraw (p, 8, tmp2); - orc_sse_emit_psraw (p, 8, tmp); + orc_sse_emit_psraw_imm (p, 8, tmp2); + orc_sse_emit_psraw_imm (p, 8, tmp); orc_sse_emit_pmullw (p, tmp2, tmp); - orc_sse_emit_psllw (p, 8, tmp); + orc_sse_emit_psllw_imm (p, 8, tmp); orc_sse_emit_por (p, tmp, dest); } @@ -1425,21 +1430,21 @@ sse_rule_mulhsb (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_movdqa (p, dest, tmp2); - orc_sse_emit_psllw (p, 8, tmp); - orc_sse_emit_psraw (p, 8, tmp); + orc_sse_emit_psllw_imm (p, 8, tmp); + orc_sse_emit_psraw_imm (p, 8, tmp); - orc_sse_emit_psllw (p, 8, dest); - orc_sse_emit_psraw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, dest); + orc_sse_emit_psraw_imm (p, 8, dest); orc_sse_emit_pmullw (p, tmp, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_psraw (p, 8, tmp); - orc_sse_emit_psraw (p, 8, tmp2); + orc_sse_emit_psraw_imm (p, 8, tmp); + orc_sse_emit_psraw_imm (p, 8, tmp2); orc_sse_emit_pmullw (p, tmp, tmp2); - orc_sse_emit_psrlw (p, 8, tmp2); - orc_sse_emit_psllw (p, 8, tmp2); + orc_sse_emit_psrlw_imm (p, 8, tmp2); + orc_sse_emit_psllw_imm (p, 8, tmp2); orc_sse_emit_por (p, tmp2, dest); } @@ -1453,21 +1458,21 @@ sse_rule_mulhub (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_movdqa (p, dest, tmp2); - orc_sse_emit_psllw (p, 8, tmp); - orc_sse_emit_psrlw (p, 8, tmp); + orc_sse_emit_psllw_imm (p, 8, tmp); + orc_sse_emit_psrlw_imm (p, 8, tmp); - orc_sse_emit_psllw (p, 8, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_pmullw (p, tmp, dest); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_psrlw (p, 8, tmp); - orc_sse_emit_psrlw (p, 8, tmp2); + orc_sse_emit_psrlw_imm (p, 8, tmp); + orc_sse_emit_psrlw_imm (p, 8, tmp2); orc_sse_emit_pmullw (p, tmp, tmp2); - orc_sse_emit_psrlw (p, 8, tmp2); - orc_sse_emit_psllw (p, 8, tmp2); + orc_sse_emit_psrlw_imm (p, 8, tmp2); + orc_sse_emit_psllw_imm (p, 8, tmp2); orc_sse_emit_por (p, tmp2, dest); } @@ -1628,8 +1633,8 @@ sse_rule_select0lw (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME slow */ /* same as convlw */ - orc_sse_emit_pslld (p, 16, dest); - orc_sse_emit_psrad (p, 16, dest); + orc_sse_emit_pslld_imm (p, 16, dest); + orc_sse_emit_psrad_imm (p, 16, dest); orc_sse_emit_packssdw (p, dest, dest); } @@ -1641,7 +1646,7 @@ sse_rule_select1lw (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME slow */ - orc_sse_emit_psrad (p, 16, dest); + orc_sse_emit_psrad_imm (p, 16, dest); orc_sse_emit_packssdw (p, dest, dest); } @@ -1665,7 +1670,7 @@ sse_rule_select1ql (OrcCompiler *p, void *user, OrcInstruction *insn) int src = p->vars[insn->src_args[0]].alloc; int dest = p->vars[insn->dest_args[0]].alloc; - orc_sse_emit_psrlq (p, 32, dest); + orc_sse_emit_psrlq_imm (p, 32, dest); #ifndef MMX orc_sse_emit_pshufd (p, ORC_SSE_SHUF(2,0,2,0), src, dest); #else @@ -1682,8 +1687,8 @@ sse_rule_select0wb (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME slow */ /* same as convwb */ - orc_sse_emit_psllw (p, 8, dest); - orc_sse_emit_psraw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, dest); + orc_sse_emit_psraw_imm (p, 8, dest); orc_sse_emit_packsswb (p, dest, dest); } @@ -1695,7 +1700,7 @@ sse_rule_select1wb (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME slow */ - orc_sse_emit_psraw (p, 8, dest); + orc_sse_emit_psraw_imm (p, 8, dest); orc_sse_emit_packsswb (p, dest, dest); } @@ -1724,14 +1729,14 @@ sse_rule_splitlw (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME slow */ - orc_sse_emit_psrad (p, 16, dest1); + orc_sse_emit_psrad_imm (p, 16, dest1); orc_sse_emit_packssdw (p, dest1, dest1); if (dest2 != src) { orc_sse_emit_movdqa (p, src, dest2); } - orc_sse_emit_pslld (p, 16, dest2); - orc_sse_emit_psrad (p, 16, dest2); + orc_sse_emit_pslld_imm (p, 16, dest2); + orc_sse_emit_psrad_imm (p, 16, dest2); orc_sse_emit_packssdw (p, dest2, dest2); } @@ -1746,7 +1751,7 @@ sse_rule_splitwb (OrcCompiler *p, void *user, OrcInstruction *insn) /* FIXME slow */ - orc_sse_emit_psraw (p, 8, dest1); + orc_sse_emit_psraw_imm (p, 8, dest1); orc_sse_emit_packsswb (p, dest1, dest1); if (dest2 != src) { @@ -1754,8 +1759,8 @@ sse_rule_splitwb (OrcCompiler *p, void *user, OrcInstruction *insn) } #if 0 - orc_sse_emit_psllw (p, 8, dest2); - orc_sse_emit_psraw (p, 8, dest2); + orc_sse_emit_psllw_imm (p, 8, dest2); + orc_sse_emit_psraw_imm (p, 8, dest2); orc_sse_emit_packsswb (p, dest2, dest2); #else orc_sse_emit_pand (p, tmp, dest2); @@ -1798,8 +1803,8 @@ sse_rule_swapw (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_psllw (p, 8, tmp); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, tmp); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_por (p, tmp, dest); } @@ -1811,12 +1816,12 @@ sse_rule_swapl (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_pslld (p, 16, tmp); - orc_sse_emit_psrld (p, 16, dest); + orc_sse_emit_pslld_imm (p, 16, tmp); + orc_sse_emit_psrld_imm (p, 16, dest); orc_sse_emit_por (p, tmp, dest); orc_sse_emit_movdqa (p, dest, tmp); - orc_sse_emit_psllw (p, 8, tmp); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, tmp); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_por (p, tmp, dest); } @@ -1828,8 +1833,8 @@ sse_rule_swapwl (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_pslld (p, 16, tmp); - orc_sse_emit_psrld (p, 16, dest); + orc_sse_emit_pslld_imm (p, 16, tmp); + orc_sse_emit_psrld_imm (p, 16, dest); orc_sse_emit_por (p, tmp, dest); } @@ -1841,16 +1846,16 @@ sse_rule_swapq (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_psllq (p, 32, tmp); - orc_sse_emit_psrlq (p, 32, dest); + orc_sse_emit_psllq_imm (p, 32, tmp); + orc_sse_emit_psrlq_imm (p, 32, dest); orc_sse_emit_por (p, tmp, dest); orc_sse_emit_movdqa (p, dest, tmp); - orc_sse_emit_pslld (p, 16, tmp); - orc_sse_emit_psrld (p, 16, dest); + orc_sse_emit_pslld_imm (p, 16, tmp); + orc_sse_emit_psrld_imm (p, 16, dest); orc_sse_emit_por (p, tmp, dest); orc_sse_emit_movdqa (p, dest, tmp); - orc_sse_emit_psllw (p, 8, tmp); - orc_sse_emit_psrlw (p, 8, dest); + orc_sse_emit_psllw_imm (p, 8, tmp); + orc_sse_emit_psrlw_imm (p, 8, dest); orc_sse_emit_por (p, tmp, dest); } @@ -2164,7 +2169,7 @@ sse_rule_avgsl (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, dest, tmp); orc_sse_emit_pxor(p, src, tmp); - orc_sse_emit_psrad(p, 1, tmp); + orc_sse_emit_psrad_imm(p, 1, tmp); orc_sse_emit_por(p, src, dest); orc_sse_emit_psubd(p, tmp, dest); @@ -2181,7 +2186,7 @@ sse_rule_avgul (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, dest, tmp); orc_sse_emit_pxor(p, src, tmp); - orc_sse_emit_psrld(p, 1, tmp); + orc_sse_emit_psrld_imm(p, 1, tmp); orc_sse_emit_por(p, src, dest); orc_sse_emit_psubd(p, tmp, dest); @@ -2202,18 +2207,18 @@ sse_rule_addssl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, src, tmp2); orc_sse_emit_pxor (p, dest, tmp2); - orc_sse_emit_psrad (p, 1, tmp2); + orc_sse_emit_psrad_imm (p, 1, tmp2); orc_sse_emit_paddd (p, tmp2, tmp); orc_sse_emit_psrad (p, 30, tmp); orc_sse_emit_pslld (p, 30, tmp); orc_sse_emit_movdqa (p, tmp, tmp2); - orc_sse_emit_pslld (p, 1, tmp2); + orc_sse_emit_pslld_imm (p, 1, tmp2); orc_sse_emit_movdqa (p, tmp, tmp3); orc_sse_emit_pxor (p, tmp2, tmp3); - orc_sse_emit_psrad (p, 31, tmp3); + orc_sse_emit_psrad_imm (p, 31, tmp3); - orc_sse_emit_psrad (p, 31, tmp2); + orc_sse_emit_psrad_imm (p, 31, tmp2); tmp = orc_compiler_get_constant (p, 4, 0x80000000); orc_sse_emit_pxor (p, tmp, tmp2); // clamped value orc_sse_emit_pand (p, tmp3, tmp2); @@ -2258,8 +2263,8 @@ sse_rule_addssl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_pxor (p, tmp, t); orc_sse_emit_por (p, t, s); orc_sse_emit_movdqa (p, src, t); - orc_sse_emit_psrad (p, 31, s); - orc_sse_emit_psrad (p, 31, t); + orc_sse_emit_psrad_imm (p, 31, s); + orc_sse_emit_psrad_imm (p, 31, t); orc_sse_emit_pand (p, s, dest); tmp = orc_compiler_get_constant (p, 4, 0x7fffffff); orc_sse_emit_pxor (p, tmp, t); @@ -2282,18 +2287,18 @@ sse_rule_subssl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_por (p, dest, tmp); orc_sse_emit_pxor (p, dest, tmp2); - orc_sse_emit_psrad (p, 1, tmp2); + orc_sse_emit_psrad_imm (p, 1, tmp2); orc_sse_emit_psubd (p, tmp2, tmp); - orc_sse_emit_psrad (p, 30, tmp); - orc_sse_emit_pslld (p, 30, tmp); + orc_sse_emit_psrad_imm (p, 30, tmp); + orc_sse_emit_pslld_imm (p, 30, tmp); orc_sse_emit_movdqa (p, tmp, tmp2); - orc_sse_emit_pslld (p, 1, tmp2); + orc_sse_emit_pslld_imm (p, 1, tmp2); orc_sse_emit_movdqa (p, tmp, tmp3); orc_sse_emit_pxor (p, tmp2, tmp3); - orc_sse_emit_psrad (p, 31, tmp3); // tmp3 is mask: ~0 is for clamping + orc_sse_emit_psrad_imm (p, 31, tmp3); // tmp3 is mask: ~0 is for clamping - orc_sse_emit_psrad (p, 31, tmp2); + orc_sse_emit_psrad_imm (p, 31, tmp2); tmp = orc_compiler_get_constant (p, 4, 0x80000000); orc_sse_emit_pxor (p, tmp, tmp2); // clamped value orc_sse_emit_pand (p, tmp3, tmp2); @@ -2319,21 +2324,21 @@ sse_rule_addusl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) /* Compute the bit that gets carried from bit 0 to bit 1 */ orc_sse_emit_movdqa (p, src, tmp); orc_sse_emit_pand (p, dest, tmp); - orc_sse_emit_pslld (p, 31, tmp); - orc_sse_emit_psrld (p, 31, tmp); + orc_sse_emit_pslld_imm (p, 31, tmp); + orc_sse_emit_psrld_imm (p, 31, tmp); /* Add in (src>>1) */ orc_sse_emit_movdqa (p, src, tmp2); - orc_sse_emit_psrld (p, 1, tmp2); + orc_sse_emit_psrld_imm (p, 1, tmp2); orc_sse_emit_paddd (p, tmp2, tmp); /* Add in (dest>>1) */ orc_sse_emit_movdqa (p, dest, tmp2); - orc_sse_emit_psrld (p, 1, tmp2); + orc_sse_emit_psrld_imm (p, 1, tmp2); orc_sse_emit_paddd (p, tmp2, tmp); /* turn overflow bit into mask */ - orc_sse_emit_psrad (p, 31, tmp); + orc_sse_emit_psrad_imm (p, 31, tmp); /* compute the sum, then or over the mask */ orc_sse_emit_paddd (p, src, dest); @@ -2345,10 +2350,10 @@ sse_rule_addusl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, src, tmp2); orc_sse_emit_pxor (p, dest, tmp2); - orc_sse_emit_psrld (p, 1, tmp2); + orc_sse_emit_psrld_imm (p, 1, tmp2); orc_sse_emit_paddd (p, tmp2, tmp); - orc_sse_emit_psrad (p, 31, tmp); + orc_sse_emit_psrad_imm (p, 31, tmp); orc_sse_emit_paddd (p, src, dest); orc_sse_emit_por (p, tmp, dest); } @@ -2362,14 +2367,14 @@ sse_rule_subusl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) int tmp2 = orc_compiler_get_temp_reg (p); orc_sse_emit_movdqa (p, src, tmp2); - orc_sse_emit_psrld (p, 1, tmp2); + orc_sse_emit_psrld_imm (p, 1, tmp2); orc_sse_emit_movdqa (p, dest, tmp); - orc_sse_emit_psrld (p, 1, tmp); + orc_sse_emit_psrld_imm (p, 1, tmp); orc_sse_emit_psubd (p, tmp, tmp2); /* turn overflow bit into mask */ - orc_sse_emit_psrad (p, 31, tmp2); + orc_sse_emit_psrad_imm (p, 31, tmp2); /* compute the difference, then and over the mask */ orc_sse_emit_psubd (p, src, dest); @@ -2384,7 +2389,7 @@ sse_rule_subusl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - orc_sse_emit_0f (p, insn_name, code, \ + orc_sse_emit_ ## insn_name (p, \ p->vars[insn->src_args[0]].alloc, \ p->vars[insn->dest_args[0]].alloc); \ } @@ -2393,22 +2398,22 @@ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - orc_sse_emit_0f (p, insn_name, code, \ + orc_sse_emit_ ## insn_name (p, \ p->vars[insn->src_args[1]].alloc, \ p->vars[insn->dest_args[0]].alloc); \ } -BINARY_F(addf, "addps", 0x58) -BINARY_F(subf, "subps", 0x5c) -BINARY_F(mulf, "mulps", 0x59) -BINARY_F(divf, "divps", 0x5e) -UNARY_F(sqrtf, "sqrtps", 0x51) +BINARY_F(addf, addps, 0x58) +BINARY_F(subf, subps, 0x5c) +BINARY_F(mulf, mulps, 0x59) +BINARY_F(divf, divps, 0x5e) +UNARY_F(sqrtf, sqrtps, 0x51) #define UNARY_D(opcode,insn_name,code) \ static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - orc_sse_emit_660f (p, insn_name, code, \ + orc_sse_emit_ ## insn_name (p, \ p->vars[insn->src_args[0]].alloc, \ p->vars[insn->dest_args[0]].alloc); \ } @@ -2417,22 +2422,22 @@ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ static void \ sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \ { \ - orc_sse_emit_660f (p, insn_name, code, \ + orc_sse_emit_ ## insn_name (p, \ p->vars[insn->src_args[1]].alloc, \ p->vars[insn->dest_args[0]].alloc); \ } -BINARY_D(addd, "addpd", 0x58) -BINARY_D(subd, "subpd", 0x5c) -BINARY_D(muld, "mulpd", 0x59) -BINARY_D(divd, "divpd", 0x5e) -UNARY_D(sqrtd, "sqrtpd", 0x51) +BINARY_D(addd, addpd, 0x58) +BINARY_D(subd, subpd, 0x5c) +BINARY_D(muld, mulpd, 0x59) +BINARY_D(divd, divpd, 0x5e) +UNARY_D(sqrtd, sqrtpd, 0x51) static void sse_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->target_flags & ORC_TARGET_FAST_NAN) { - orc_sse_emit_0f (p, "minps", 0x5d, + orc_sse_emit_minps (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); } else { @@ -2440,10 +2445,10 @@ sse_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, p->vars[insn->src_args[1]].alloc, tmp); - orc_sse_emit_0f (p, "minps", 0x5d, + orc_sse_emit_minps (p, p->vars[insn->dest_args[0]].alloc, tmp); - orc_sse_emit_0f (p, "minps", 0x5d, + orc_sse_emit_minps (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); orc_sse_emit_por (p, @@ -2456,7 +2461,7 @@ static void sse_rule_mind (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->target_flags & ORC_TARGET_FAST_NAN) { - orc_sse_emit_660f (p, "minpd", 0x5d, + orc_sse_emit_minpd (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); } else { @@ -2464,10 +2469,10 @@ sse_rule_mind (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, p->vars[insn->src_args[1]].alloc, tmp); - orc_sse_emit_660f (p, "minpd", 0x5d, + orc_sse_emit_minpd (p, p->vars[insn->dest_args[0]].alloc, tmp); - orc_sse_emit_660f (p, "minpd", 0x5d, + orc_sse_emit_minpd (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); orc_sse_emit_por (p, @@ -2480,7 +2485,7 @@ static void sse_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->target_flags & ORC_TARGET_FAST_NAN) { - orc_sse_emit_0f (p, "maxps", 0x5f, + orc_sse_emit_maxps (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); } else { @@ -2488,10 +2493,10 @@ sse_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, p->vars[insn->src_args[1]].alloc, tmp); - orc_sse_emit_0f (p, "maxps", 0x5f, + orc_sse_emit_maxps (p, p->vars[insn->dest_args[0]].alloc, tmp); - orc_sse_emit_0f (p, "maxps", 0x5f, + orc_sse_emit_maxps (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); orc_sse_emit_por (p, @@ -2504,7 +2509,7 @@ static void sse_rule_maxd (OrcCompiler *p, void *user, OrcInstruction *insn) { if (p->target_flags & ORC_TARGET_FAST_NAN) { - orc_sse_emit_660f (p, "maxpd", 0x5f, + orc_sse_emit_maxpd (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); } else { @@ -2512,10 +2517,10 @@ sse_rule_maxd (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_movdqa (p, p->vars[insn->src_args[1]].alloc, tmp); - orc_sse_emit_660f (p, "maxpd", 0x5f, + orc_sse_emit_maxpd (p, p->vars[insn->dest_args[0]].alloc, tmp); - orc_sse_emit_660f (p, "maxpd", 0x5f, + orc_sse_emit_maxpd (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); orc_sse_emit_por (p, @@ -2527,57 +2532,51 @@ sse_rule_maxd (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_0f (p, "cmpeqps", 0xc2, + orc_sse_emit_cmpeqps (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); - *p->codeptr++ = 0x00; } static void sse_rule_cmpeqd (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_660f (p, "cmpeqpd", 0xc2, + orc_sse_emit_cmpeqpd (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); - *p->codeptr++ = 0x00; } static void sse_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_0f (p, "cmpltps", 0xc2, + orc_sse_emit_cmpltps (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); - *p->codeptr++ = 0x01; } static void sse_rule_cmpltd (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_660f (p, "cmpltpd", 0xc2, + orc_sse_emit_cmpltpd (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); - *p->codeptr++ = 0x01; } static void sse_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_0f (p, "cmpleps", 0xc2, + orc_sse_emit_cmpleps (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); - *p->codeptr++ = 0x02; } static void sse_rule_cmpled (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_660f (p, "cmplepd", 0xc2, + orc_sse_emit_cmplepd (p, p->vars[insn->src_args[1]].alloc, p->vars[insn->dest_args[0]].alloc); - *p->codeptr++ = 0x02; } @@ -2591,8 +2590,8 @@ sse_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn) tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000); orc_sse_emit_movdqa (p, src, tmp); - orc_sse_emit_f30f (p, "cvttps2dq", 0x5b, src, dest); - orc_sse_emit_psrad (p, 31, tmp); + orc_sse_emit_cvttps2dq (p, src, dest); + orc_sse_emit_psrad_imm (p, 31, tmp); orc_sse_emit_pcmpeqd (p, dest, tmpc); orc_sse_emit_pandn (p, tmpc, tmp); orc_sse_emit_paddd (p, tmp, dest); @@ -2609,8 +2608,8 @@ sse_rule_convdl (OrcCompiler *p, void *user, OrcInstruction *insn) tmpc = orc_compiler_get_temp_constant (p, 4, 0x80000000); orc_sse_emit_pshufd (p, ORC_SSE_SHUF(3,1,3,1), src, tmp); - orc_sse_emit_660f (p, "cvttpd2dq", 0xe6, src, dest); - orc_sse_emit_psrad (p, 31, tmp); + orc_sse_emit_cvttpd2dq (p, src, dest); + orc_sse_emit_psrad_imm (p, 31, tmp); orc_sse_emit_pcmpeqd (p, dest, tmpc); orc_sse_emit_pandn (p, tmpc, tmp); orc_sse_emit_paddd (p, tmp, dest); @@ -2619,7 +2618,7 @@ sse_rule_convdl (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_0f (p, "cvtdq2ps", 0x5b, + orc_sse_emit_cvtdq2ps (p, p->vars[insn->src_args[0]].alloc, p->vars[insn->dest_args[0]].alloc); } @@ -2627,7 +2626,7 @@ sse_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_convld (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_f30f (p, "cvtdq2pd", 0xe6, + orc_sse_emit_cvtdq2pd (p, p->vars[insn->src_args[0]].alloc, p->vars[insn->dest_args[0]].alloc); } @@ -2635,7 +2634,7 @@ sse_rule_convld (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_convfd (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_0f (p, "cvtps2pd", 0x5a, + orc_sse_emit_cvtps2pd (p, p->vars[insn->src_args[0]].alloc, p->vars[insn->dest_args[0]].alloc); } @@ -2643,7 +2642,7 @@ sse_rule_convfd (OrcCompiler *p, void *user, OrcInstruction *insn) static void sse_rule_convdf (OrcCompiler *p, void *user, OrcInstruction *insn) { - orc_sse_emit_660f (p, "cvtpd2ps", 0x5a, + orc_sse_emit_cvtpd2ps (p, p->vars[insn->src_args[0]].alloc, p->vars[insn->dest_args[0]].alloc); } diff --git a/orc/orcsse.h b/orc/orcsse.h index 6b58e7f..cd13e01 100644 --- a/orc/orcsse.h +++ b/orc/orcsse.h @@ -3,6 +3,7 @@ #define _ORC_SSE_H_ #include +#include typedef enum { ORC_TARGET_SSE_SSE2 = (1<<0), @@ -81,141 +82,10 @@ void orc_sse_restore_mxcsr (OrcCompiler *compiler); void orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 value); +void orc_sse_emit_sysinsn (OrcCompiler *p, int opcode, int src, int dest); unsigned int orc_sse_get_cpu_flags (void); -/* SSE instructions */ - -/* SSE2 instructions */ -#define orc_sse_emit_punpcklbw(p,a,b) orc_sse_emit_660f (p, "punpcklbw", 0x60, a, b) -#define orc_sse_emit_punpcklwd(p,a,b) orc_sse_emit_660f (p, "punpcklwd", 0x61, a, b) -#define orc_sse_emit_punpckldq(p,a,b) orc_sse_emit_660f (p, "punpckldq", 0x62, a, b) -#define orc_sse_emit_packsswb(p,a,b) orc_sse_emit_660f (p, "packsswb", 0x63, a, b) -#define orc_sse_emit_pcmpgtb(p,a,b) orc_sse_emit_660f (p, "pcmpgtb", 0x64, a, b) -#define orc_sse_emit_pcmpgtw(p,a,b) orc_sse_emit_660f (p, "pcmpgtw", 0x65, a, b) -#define orc_sse_emit_pcmpgtd(p,a,b) orc_sse_emit_660f (p, "pcmpgtd", 0x66, a, b) -#define orc_sse_emit_packuswb(p,a,b) orc_sse_emit_660f (p, "packuswb", 0x67, a, b) -#define orc_sse_emit_punpckhbw(p,a,b) orc_sse_emit_660f (p, "punpckhbw", 0x68, a, b) -#define orc_sse_emit_punpckhwd(p,a,b) orc_sse_emit_660f (p, "punpckhwd", 0x69, a, b) -#define orc_sse_emit_punpckhdq(p,a,b) orc_sse_emit_660f (p, "punpckhdq", 0x6a, a, b) -#define orc_sse_emit_packssdw(p,a,b) orc_sse_emit_660f (p, "packssdw", 0x6b, a, b) -#define orc_sse_emit_punpcklqdq(p,a,b) orc_sse_emit_660f (p, "punpcklqdq", 0x6c, a, b) -#define orc_sse_emit_punpckhqdq(p,a,b) orc_sse_emit_660f (p, "punpckhqdq", 0x6d, a, b) - -#define orc_sse_emit_movdqa(p,a,b) orc_sse_emit_660f (p, "movdqa", 0x6f, a, b) - -#define orc_sse_emit_psraw(p,a,b) orc_sse_emit_shiftimm (p, "psraw", 0x71, 4, a, b) -#define orc_sse_emit_psrlw(p,a,b) orc_sse_emit_shiftimm (p, "psrlw", 0x71, 2, a, b) -#define orc_sse_emit_psllw(p,a,b) orc_sse_emit_shiftimm (p, "psllw", 0x71, 6, a, b) -#define orc_sse_emit_psrad(p,a,b) orc_sse_emit_shiftimm (p, "psrad", 0x72, 4, a, b) -#define orc_sse_emit_psrld(p,a,b) orc_sse_emit_shiftimm (p, "psrld", 0x72, 2, a, b) -#define orc_sse_emit_pslld(p,a,b) orc_sse_emit_shiftimm (p, "pslld", 0x72, 6, a, b) -#define orc_sse_emit_psrlq(p,a,b) orc_sse_emit_shiftimm (p, "psrlq", 0x73, 2, a, b) -#define orc_sse_emit_psllq(p,a,b) orc_sse_emit_shiftimm (p, "psllq", 0x73, 6, a, b) -#define orc_sse_emit_psrldq(p,a,b) orc_sse_emit_shiftimm (p, "psrldq", 0x73, 3, a, b) -#define orc_sse_emit_pslldq(p,a,b) orc_sse_emit_shiftimm (p, "pslldq", 0x73, 7, a, b) - -#define orc_sse_emit_psrlq_reg(p,a,b) orc_sse_emit_660f (p, "psrlq", 0xd3, a, b) - -#define orc_sse_emit_pcmpeqb(p,a,b) orc_sse_emit_660f (p, "pcmpeqb", 0x74, a, b) -#define orc_sse_emit_pcmpeqw(p,a,b) orc_sse_emit_660f (p, "pcmpeqw", 0x75, a, b) -#define orc_sse_emit_pcmpeqd(p,a,b) orc_sse_emit_660f (p, "pcmpeqd", 0x76, a, b) - - -#define orc_sse_emit_paddq(p,a,b) orc_sse_emit_660f (p, "paddq", 0xd4, a, b) -#define orc_sse_emit_pmullw(p,a,b) orc_sse_emit_660f (p, "pmullw", 0xd5, a, b) - -#define orc_sse_emit_psubusb(p,a,b) orc_sse_emit_660f (p, "psubusb", 0xd8, a, b) -#define orc_sse_emit_psubusw(p,a,b) orc_sse_emit_660f (p, "psubusw", 0xd9, a, b) -#define orc_sse_emit_pminub(p,a,b) orc_sse_emit_660f (p, "pminub", 0xda, a, b) -#define orc_sse_emit_pand(p,a,b) orc_sse_emit_660f (p, "pand", 0xdb, a, b) -#define orc_sse_emit_paddusb(p,a,b) orc_sse_emit_660f (p, "paddusb", 0xdc, a, b) -#define orc_sse_emit_paddusw(p,a,b) orc_sse_emit_660f (p, "paddusw", 0xdd, a, b) -#define orc_sse_emit_pmaxub(p,a,b) orc_sse_emit_660f (p, "pmaxub", 0xde, a, b) -#define orc_sse_emit_pandn(p,a,b) orc_sse_emit_660f (p, "pandn", 0xdf, a, b) - -#define orc_sse_emit_pavgb(p,a,b) orc_sse_emit_660f (p, "pavgb", 0xe0, a, b) -#define orc_sse_emit_pavgw(p,a,b) orc_sse_emit_660f (p, "pavgw", 0xe3, a, b) - -#define orc_sse_emit_pmulhuw(p,a,b) orc_sse_emit_660f (p, "pmulhuw", 0xe4, a, b) -#define orc_sse_emit_pmulhw(p,a,b) orc_sse_emit_660f (p, "pmulhw", 0xe5, a, b) - -#define orc_sse_emit_psubsb(p,a,b) orc_sse_emit_660f (p, "psubsb", 0xe8, a, b) -#define orc_sse_emit_psubsw(p,a,b) orc_sse_emit_660f (p, "psubsw", 0xe9, a, b) -#define orc_sse_emit_pminsw(p,a,b) orc_sse_emit_660f (p, "pminsw", 0xea, a, b) -#define orc_sse_emit_por(p,a,b) orc_sse_emit_660f (p, "por", 0xeb, a, b) -#define orc_sse_emit_paddsb(p,a,b) orc_sse_emit_660f (p, "paddsb", 0xec, a, b) -#define orc_sse_emit_paddsw(p,a,b) orc_sse_emit_660f (p, "paddsw", 0xed, a, b) -#define orc_sse_emit_pmaxsw(p,a,b) orc_sse_emit_660f (p, "pmaxsw", 0xee, a, b) -#define orc_sse_emit_pxor(p,a,b) orc_sse_emit_660f (p, "pxor", 0xef, a, b) - -#define orc_sse_emit_pmuludq(p,a,b) orc_sse_emit_660f (p, "pmuludq", 0xf4, a, b) -#define orc_sse_emit_pmaddwd(p,a,b) orc_sse_emit_660f (p, "pmaddwd", 0xf5, a, b) -#define orc_sse_emit_psadbw(p,a,b) orc_sse_emit_660f (p, "psadbw", 0xf6, a, b) - -#define orc_sse_emit_psubb(p,a,b) orc_sse_emit_660f (p, "psubb", 0xf8, a, b) -#define orc_sse_emit_psubw(p,a,b) orc_sse_emit_660f (p, "psubw", 0xf9, a, b) -#define orc_sse_emit_psubd(p,a,b) orc_sse_emit_660f (p, "psubd", 0xfa, a, b) -#define orc_sse_emit_psubq(p,a,b) orc_sse_emit_660f (p, "psubq", 0xfb, a, b) -#define orc_sse_emit_paddb(p,a,b) orc_sse_emit_660f (p, "paddb", 0xfc, a, b) -#define orc_sse_emit_paddw(p,a,b) orc_sse_emit_660f (p, "paddw", 0xfd, a, b) -#define orc_sse_emit_paddd(p,a,b) orc_sse_emit_660f (p, "paddd", 0xfe, a, b) - -/* SSE3 instructions */ - -/* SSSE3 instructions */ -#define orc_sse_emit_pshufb(p,a,b) orc_sse_emit_660f (p, "pshufb", 0x3800, a, b) -#define orc_sse_emit_phaddw(p,a,b) orc_sse_emit_660f (p, "phaddw", 0x3801, a, b) -#define orc_sse_emit_phaddd(p,a,b) orc_sse_emit_660f (p, "phaddd", 0x3802, a, b) -#define orc_sse_emit_phaddsw(p,a,b) orc_sse_emit_660f (p, "phaddsw", 0x3803, a, b) -#define orc_sse_emit_pmaddubsw(p,a,b) orc_sse_emit_660f (p, "pmaddubsw", 0x3804, a, b) -#define orc_sse_emit_phsubw(p,a,b) orc_sse_emit_660f (p, "phsubw", 0x3805, a, b) -#define orc_sse_emit_phsubd(p,a,b) orc_sse_emit_660f (p, "phsubd", 0x3806, a, b) -#define orc_sse_emit_phsubsw(p,a,b) orc_sse_emit_660f (p, "phsubsw", 0x3807, a, b) -#define orc_sse_emit_psignb(p,a,b) orc_sse_emit_660f (p, "psignb", 0x3808, a, b) -#define orc_sse_emit_psignw(p,a,b) orc_sse_emit_660f (p, "psignw", 0x3809, a, b) -#define orc_sse_emit_psignd(p,a,b) orc_sse_emit_660f (p, "psignd", 0x380a, a, b) -#define orc_sse_emit_pmulhrsw(p,a,b) orc_sse_emit_660f (p, "pmulhrsw", 0x380b, a, b) - -#define orc_sse_emit_pabsb(p,a,b) orc_sse_emit_660f (p, "pabsb", 0x381c, a, b) -#define orc_sse_emit_pabsw(p,a,b) orc_sse_emit_660f (p, "pabsw", 0x381d, a, b) -#define orc_sse_emit_pabsd(p,a,b) orc_sse_emit_660f (p, "pabsd", 0x381e, a, b) - - -/* SSE4.1 instructions */ -#define orc_sse_emit_pmovsxbw(p,a,b) orc_sse_emit_660f (p, "pmovsxbw", 0x3820, a, b) -#define orc_sse_emit_pmovsxbd(p,a,b) orc_sse_emit_660f (p, "pmovsxbd", 0x3821, a, b) -#define orc_sse_emit_pmovsxbq(p,a,b) orc_sse_emit_660f (p, "pmovsxbq", 0x3822, a, b) -#define orc_sse_emit_pmovsxwd(p,a,b) orc_sse_emit_660f (p, "pmovsxwd", 0x3823, a, b) -#define orc_sse_emit_pmovsxwq(p,a,b) orc_sse_emit_660f (p, "pmovsxwq", 0x3824, a, b) -#define orc_sse_emit_pmovsxdq(p,a,b) orc_sse_emit_660f (p, "pmovsxdq", 0x3825, a, b) - -#define orc_sse_emit_pmuldq(p,a,b) orc_sse_emit_660f (p, "pmuldq", 0x3828, a, b) -#define orc_sse_emit_pcmpeqq(p,a,b) orc_sse_emit_660f (p, "pcmpeqq", 0x3829, a, b) - -#define orc_sse_emit_packusdw(p,a,b) orc_sse_emit_660f (p, "packusdw", 0x382b, a, b) - -#define orc_sse_emit_pmovzxbw(p,a,b) orc_sse_emit_660f (p, "pmovzxbw", 0x3830, a, b) -#define orc_sse_emit_pmovzxbd(p,a,b) orc_sse_emit_660f (p, "pmovzxbd", 0x3831, a, b) -#define orc_sse_emit_pmovzxbq(p,a,b) orc_sse_emit_660f (p, "pmovzxbq", 0x3832, a, b) -#define orc_sse_emit_pmovzxwd(p,a,b) orc_sse_emit_660f (p, "pmovzxwd", 0x3833, a, b) -#define orc_sse_emit_pmovzxwq(p,a,b) orc_sse_emit_660f (p, "pmovzxwq", 0x3834, a, b) -#define orc_sse_emit_pmovzxdq(p,a,b) orc_sse_emit_660f (p, "pmovzxdq", 0x3835, a, b) - -#define orc_sse_emit_pmulld(p,a,b) orc_sse_emit_660f (p, "pmulld", 0x3840, a, b) -#define orc_sse_emit_phminposuw(p,a,b) orc_sse_emit_660f (p, "phminposuw", 0x3841, a, b) - -#define orc_sse_emit_pminsb(p,a,b) orc_sse_emit_660f (p, "pminsb", 0x3838, a, b) -#define orc_sse_emit_pminsd(p,a,b) orc_sse_emit_660f (p, "pminsd", 0x3839, a, b) -#define orc_sse_emit_pminuw(p,a,b) orc_sse_emit_660f (p, "pminuw", 0x383a, a, b) -#define orc_sse_emit_pminud(p,a,b) orc_sse_emit_660f (p, "pminud", 0x383b, a, b) -#define orc_sse_emit_pmaxsb(p,a,b) orc_sse_emit_660f (p, "pmaxsb", 0x383c, a, b) -#define orc_sse_emit_pmaxsd(p,a,b) orc_sse_emit_660f (p, "pmaxsd", 0x383d, a, b) -#define orc_sse_emit_pmaxuw(p,a,b) orc_sse_emit_660f (p, "pmaxuw", 0x383e, a, b) -#define orc_sse_emit_pmaxud(p,a,b) orc_sse_emit_660f (p, "pmaxud", 0x383f, a, b) - -/* SSE4.2 instructions */ -#define orc_sse_emit_pcmpgtq(p,a,b) orc_sse_emit_660f (p, "pcmpgtq", 0x3837, a, b) #endif diff --git a/orc/orcsysinsn.h b/orc/orcsysinsn.h new file mode 100644 index 0000000..3411f3c --- /dev/null +++ b/orc/orcsysinsn.h @@ -0,0 +1,35 @@ + +#ifndef _ORC_ORC_SYSINSN_H_ +#define _ORC_ORC_SYSINSN_H_ + +typedef struct _OrcSysInsn OrcSysInsn; +typedef struct _OrcSysOpcode OrcSysOpcode; + +struct _OrcSysInsn { + int opcode; + int dest_reg; + int src1_reg; + int src2_reg; + + int immediate; + + int mem_reg; + int memoffset; + int indexreg; + int shift; +}; + +struct _OrcSysOpcode { + char name[16]; + int type; + int flags; + orc_uint32 code; + int code2; +}; + + +#define ORC_SYS_OPCODE_FLAG_FIXED (1<<0) + + +#endif + diff --git a/orc/orcx86insn.c b/orc/orcx86insn.c new file mode 100644 index 0000000..b54892e --- /dev/null +++ b/orc/orcx86insn.c @@ -0,0 +1,207 @@ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + + +OrcSysOpcode orc_x86_opcodes[] = { + { "punpcklbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f60 }, + { "punpcklwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f61 }, + { "punpckldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f62 }, + { "packsswb", ORC_X86_INSN_TYPE_SD, 0, 0x660f63 }, + { "pcmpgtb", ORC_X86_INSN_TYPE_SD, 0, 0x660f64 }, + { "pcmpgtw", ORC_X86_INSN_TYPE_SD, 0, 0x660f65 }, + { "pcmpgtd", ORC_X86_INSN_TYPE_SD, 0, 0x660f66 }, + { "packuswb", ORC_X86_INSN_TYPE_SD, 0, 0x660f67 }, + { "punpckhbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f68 }, + { "punpckhwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f69 }, + { "punpckhdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f6a }, + { "packssdw", ORC_X86_INSN_TYPE_SD, 0, 0x660f6b }, + { "punpcklqdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f6c }, + { "punpckhqdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f6d }, + { "movdqa", ORC_X86_INSN_TYPE_SD, 0, 0x660f6f }, + { "psraw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe1 }, + { "psrlw", ORC_X86_INSN_TYPE_SD, 0, 0x660fd1 }, + { "psllw", ORC_X86_INSN_TYPE_SD, 0, 0x660ff1 }, + { "psrad", ORC_X86_INSN_TYPE_SD, 0, 0x660fe2 }, + { "psrld", ORC_X86_INSN_TYPE_SD, 0, 0x660fd2 }, + { "pslld", ORC_X86_INSN_TYPE_SD, 0, 0x660ff2 }, + { "psrlq", ORC_X86_INSN_TYPE_SD, 0, 0x660fd3 }, + { "psllq", ORC_X86_INSN_TYPE_SD, 0, 0x660ff3 }, + { "psrldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f73 }, + { "pslldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f73 }, + { "psrlq", ORC_X86_INSN_TYPE_SD, 0, 0x660fd3 }, + { "pcmpeqb", ORC_X86_INSN_TYPE_SD, 0, 0x660f74 }, + { "pcmpeqw", ORC_X86_INSN_TYPE_SD, 0, 0x660f75 }, + { "pcmpeqd", ORC_X86_INSN_TYPE_SD, 0, 0x660f76 }, + { "paddq", ORC_X86_INSN_TYPE_SD, 0, 0x660fd4 }, + { "pmullw", ORC_X86_INSN_TYPE_SD, 0, 0x660fd5 }, + { "psubusb", ORC_X86_INSN_TYPE_SD, 0, 0x660fd8 }, + { "psubusw", ORC_X86_INSN_TYPE_SD, 0, 0x660fd9 }, + { "pminub", ORC_X86_INSN_TYPE_SD, 0, 0x660fda }, + { "pand", ORC_X86_INSN_TYPE_SD, 0, 0x660fdb }, + { "paddusb", ORC_X86_INSN_TYPE_SD, 0, 0x660fdc }, + { "paddusw", ORC_X86_INSN_TYPE_SD, 0, 0x660fdd }, + { "pmaxub", ORC_X86_INSN_TYPE_SD, 0, 0x660fde }, + { "pandn", ORC_X86_INSN_TYPE_SD, 0, 0x660fdf }, + { "pavgb", ORC_X86_INSN_TYPE_SD, 0, 0x660fe0 }, + { "pavgw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe3 }, + { "pmulhuw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe4 }, + { "pmulhw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe5 }, + { "psubsb", ORC_X86_INSN_TYPE_SD, 0, 0x660fe8 }, + { "psubsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fe9 }, + { "pminsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fea }, + { "por", ORC_X86_INSN_TYPE_SD, 0, 0x660feb }, + { "paddsb", ORC_X86_INSN_TYPE_SD, 0, 0x660fec }, + { "paddsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fed }, + { "pmaxsw", ORC_X86_INSN_TYPE_SD, 0, 0x660fee }, + { "pxor", ORC_X86_INSN_TYPE_SD, 0, 0x660fef }, + { "pmuludq", ORC_X86_INSN_TYPE_SD, 0, 0x660ff4 }, + { "pmaddwd", ORC_X86_INSN_TYPE_SD, 0, 0x660ff5 }, + { "psadbw", ORC_X86_INSN_TYPE_SD, 0, 0x660ff6 }, + { "psubb", ORC_X86_INSN_TYPE_SD, 0, 0x660ff8 }, + { "psubw", ORC_X86_INSN_TYPE_SD, 0, 0x660ff9 }, + { "psubd", ORC_X86_INSN_TYPE_SD, 0, 0x660ffa }, + { "psubq", ORC_X86_INSN_TYPE_SD, 0, 0x660ffb }, + { "paddb", ORC_X86_INSN_TYPE_SD, 0, 0x660ffc }, + { "paddw", ORC_X86_INSN_TYPE_SD, 0, 0x660ffd }, + { "paddd", ORC_X86_INSN_TYPE_SD, 0, 0x660ffe }, + { "pshufb", ORC_X86_INSN_TYPE_SD, 0, 0x660f3800 }, + { "phaddw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3801 }, + { "phaddd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3802 }, + { "phaddsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3803 }, + { "pmaddubsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3804 }, + { "phsubw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3805 }, + { "phsubd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3806 }, + { "phsubsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3807 }, + { "psignb", ORC_X86_INSN_TYPE_SD, 0, 0x660f3808 }, + { "psignw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3809 }, + { "psignd", ORC_X86_INSN_TYPE_SD, 0, 0x660f380a }, + { "pmulhrsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f380b }, + { "pabsb", ORC_X86_INSN_TYPE_SD, 0, 0x660f381c }, + { "pabsw", ORC_X86_INSN_TYPE_SD, 0, 0x660f381d }, + { "pabsd", ORC_X86_INSN_TYPE_SD, 0, 0x660f381e }, + { "pmovsxbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3820 }, + { "pmovsxbd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3821 }, + { "pmovsxbq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3822 }, + { "pmovsxwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3823 }, + { "pmovsxwq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3824 }, + { "pmovsxdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3825 }, + { "pmuldq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3828 }, + { "pcmpeqq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3829 }, + { "packusdw", ORC_X86_INSN_TYPE_SD, 0, 0x660f382b }, + { "pmovzxbw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3830 }, + { "pmovzxbd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3831 }, + { "pmovzxbq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3832 }, + { "pmovzxwd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3833 }, + { "pmovzxwq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3834 }, + { "pmovzxdq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3835 }, + { "pmulld", ORC_X86_INSN_TYPE_SD, 0, 0x660f3840 }, + { "phminposuw", ORC_X86_INSN_TYPE_SD, 0, 0x660f3841 }, + { "pminsb", ORC_X86_INSN_TYPE_SD, 0, 0x660f3838 }, + { "pminsd", ORC_X86_INSN_TYPE_SD, 0, 0x660f3839 }, + { "pminuw", ORC_X86_INSN_TYPE_SD, 0, 0x660f383a }, + { "pminud", ORC_X86_INSN_TYPE_SD, 0, 0x660f383b }, + { "pmaxsb", ORC_X86_INSN_TYPE_SD, 0, 0x660f383c }, + { "pmaxsd", ORC_X86_INSN_TYPE_SD, 0, 0x660f383d }, + { "pmaxuw", ORC_X86_INSN_TYPE_SD, 0, 0x660f383e }, + { "pmaxud", ORC_X86_INSN_TYPE_SD, 0, 0x660f383f }, + { "pcmpgtq", ORC_X86_INSN_TYPE_SD, 0, 0x660f3837 }, + { "addps", ORC_X86_INSN_TYPE_SD, 0, 0x0f58 }, + { "subps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5c }, + { "mulps", ORC_X86_INSN_TYPE_SD, 0, 0x0f59 }, + { "divps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5e }, + { "sqrtps", ORC_X86_INSN_TYPE_SD, 0, 0x0f51 }, + { "addpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 }, + { "subpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5c }, + { "mulpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f59 }, + { "divpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5e }, + { "sqrtpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f51 }, + { "cmpeqps", ORC_X86_INSN_TYPE_SD2, 0, 0x0fc2, 0 }, + { "cmpeqpd", ORC_X86_INSN_TYPE_SD2, 0, 0x660fc2, 0 }, + { "cmpltps", ORC_X86_INSN_TYPE_SD2, 0, 0x0fc2, 1 }, + { "cmpltpd", ORC_X86_INSN_TYPE_SD2, 0, 0x660fc2, 1 }, + { "cmpleps", ORC_X86_INSN_TYPE_SD2, 0, 0x0fc2, 2 }, + { "cmplepd", ORC_X86_INSN_TYPE_SD2, 0, 0x660fc2, 2 }, + { "cvttps2dq", ORC_X86_INSN_TYPE_SD, 0, 0xf30f5b }, + { "cvttpd2dq", ORC_X86_INSN_TYPE_SD, 0, 0x660fe6 }, + { "cvtdq2ps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5b }, + { "cvtdq2pd", ORC_X86_INSN_TYPE_SD, 0, 0xf30fe6 }, + { "cvtps2pd", ORC_X86_INSN_TYPE_SD, 0, 0x0f5a }, + { "cvtpd2ps", ORC_X86_INSN_TYPE_SD, 0, 0x660f5a }, + { "minps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5d }, + { "minpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5d }, + { "maxps", ORC_X86_INSN_TYPE_SD, 0, 0x0f5f }, + { "maxpd", ORC_X86_INSN_TYPE_SD, 0, 0x660f5f }, + { "psraw", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f71, 4 }, + { "psrlw", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f71, 2 }, + { "psllw", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f71, 6 }, + { "psrad", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f72, 4 }, + { "psrld", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f72, 2 }, + { "pslld", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f72, 6 }, + { "psrlq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 2 }, + { "psllq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 6 }, + { "psrldq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 3 }, + { "pslldq", ORC_X86_INSN_TYPE_SHIFTIMM, 0, 0x660f73, 7 }, + + //{ "", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 }, + //{ "", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 }, + //{ "addps", ORC_X86_INSN_TYPE_SD, 0, 0x660f58 }, +}; + + +void +orc_sse_emit_sysinsn (OrcCompiler *p, int index, int src, int dest) +{ + OrcSysOpcode *opcode = orc_x86_opcodes + index; + + switch (opcode->type) { + case ORC_X86_INSN_TYPE_SD: + case ORC_X86_INSN_TYPE_SD2: + ORC_ASM_CODE(p," %s %%%s, %%%s\n", opcode->name, + orc_x86_get_regname_sse(src), + orc_x86_get_regname_sse(dest)); + break; + case ORC_X86_INSN_TYPE_SHIFTIMM: + ORC_ASM_CODE(p," %s $%d, %%%s\n", opcode->name, + src, + orc_x86_get_regname_sse(dest)); + break; + } + + if (opcode->code & 0xff000000) { + *p->codeptr++ = (opcode->code >> 24) & 0xff; + orc_x86_emit_rex (p, 0, dest, 0, src); + *p->codeptr++ = (opcode->code >> 16) & 0xff; + *p->codeptr++ = (opcode->code >> 8) & 0xff; + *p->codeptr++ = (opcode->code >> 0) & 0xff; + } else if (opcode->code & 0xff0000) { + *p->codeptr++ = (opcode->code >> 16) & 0xff; + orc_x86_emit_rex (p, 0, dest, 0, src); + *p->codeptr++ = (opcode->code >> 8) & 0xff; + *p->codeptr++ = (opcode->code >> 0) & 0xff; + } else { + *p->codeptr++ = (opcode->code >> 8) & 0xff; + orc_x86_emit_rex (p, 0, dest, 0, src); + *p->codeptr++ = (opcode->code >> 0) & 0xff; + } + + switch (opcode->type) { + case ORC_X86_INSN_TYPE_SD: + orc_x86_emit_modrm_reg (p, src, dest); + break; + case ORC_X86_INSN_TYPE_SHIFTIMM: + orc_x86_emit_modrm_reg (p, dest, opcode->code2); + *p->codeptr++ = src; + break; + case ORC_X86_INSN_TYPE_SD2: + orc_x86_emit_modrm_reg (p, src, dest); + *p->codeptr++ = opcode->code2; + break; + } + +} + diff --git a/orc/orcx86insn.h b/orc/orcx86insn.h new file mode 100644 index 0000000..fb385ff --- /dev/null +++ b/orc/orcx86insn.h @@ -0,0 +1,292 @@ + +#ifndef ORC_ORC_X86_INSN_H_ +#define ORC_ORC_X86_INSN_H_ + +#define ORC_X86_INSN_TYPE_SD 0 +#define ORC_X86_INSN_TYPE_SHIFTIMM 1 +#define ORC_X86_INSN_TYPE_SD2 2 + +enum { + ORC_X86_punpcklbw, + ORC_X86_punpcklwd, + ORC_X86_punpckldq, + ORC_X86_packsswb, + ORC_X86_pcmpgtb, + ORC_X86_pcmpgtw, + ORC_X86_pcmpgtd, + ORC_X86_packuswb, + ORC_X86_punpckhbw, + ORC_X86_punpckhwd, + ORC_X86_punpckhdq, + ORC_X86_packssdw, + ORC_X86_punpcklqdq, + ORC_X86_punpckhqdq, + ORC_X86_movdqa, + ORC_X86_psraw, + ORC_X86_psrlw, + ORC_X86_psllw, + ORC_X86_psrad, + ORC_X86_psrld, + ORC_X86_pslld, + ORC_X86_psrlq, + ORC_X86_psllq, + ORC_X86_psrldq, + ORC_X86_pslldq, + ORC_X86_psrlq_reg, + ORC_X86_pcmpeqb, + ORC_X86_pcmpeqw, + ORC_X86_pcmpeqd, + ORC_X86_paddq, + ORC_X86_pmullw, + ORC_X86_psubusb, + ORC_X86_psubusw, + ORC_X86_pminub, + ORC_X86_pand, + ORC_X86_paddusb, + ORC_X86_paddusw, + ORC_X86_pmaxub, + ORC_X86_pandn, + ORC_X86_pavgb, + ORC_X86_pavgw, + ORC_X86_pmulhuw, + ORC_X86_pmulhw, + ORC_X86_psubsb, + ORC_X86_psubsw, + ORC_X86_pminsw, + ORC_X86_por, + ORC_X86_paddsb, + ORC_X86_paddsw, + ORC_X86_pmaxsw, + ORC_X86_pxor, + ORC_X86_pmuludq, + ORC_X86_pmaddwd, + ORC_X86_psadbw, + ORC_X86_psubb, + ORC_X86_psubw, + ORC_X86_psubd, + ORC_X86_psubq, + ORC_X86_paddb, + ORC_X86_paddw, + ORC_X86_paddd, + ORC_X86_pshufb, + ORC_X86_phaddw, + ORC_X86_phaddd, + ORC_X86_phaddsw, + ORC_X86_pmaddubsw, + ORC_X86_phsubw, + ORC_X86_phsubd, + ORC_X86_phsubsw, + ORC_X86_psignb, + ORC_X86_psignw, + ORC_X86_psignd, + ORC_X86_pmulhrsw, + ORC_X86_pabsb, + ORC_X86_pabsw, + ORC_X86_pabsd, + ORC_X86_pmovsxbw, + ORC_X86_pmovsxbd, + ORC_X86_pmovsxbq, + ORC_X86_pmovsxwd, + ORC_X86_pmovsxwq, + ORC_X86_pmovsxdq, + ORC_X86_pmuldq, + ORC_X86_pcmpeqq, + ORC_X86_packusdw, + ORC_X86_pmovzxbw, + ORC_X86_pmovzxbd, + ORC_X86_pmovzxbq, + ORC_X86_pmovzxwd, + ORC_X86_pmovzxwq, + ORC_X86_pmovzxdq, + ORC_X86_pmulld, + ORC_X86_phminposuw, + ORC_X86_pminsb, + ORC_X86_pminsd, + ORC_X86_pminuw, + ORC_X86_pminud, + ORC_X86_pmaxsb, + ORC_X86_pmaxsd, + ORC_X86_pmaxuw, + ORC_X86_pmaxud, + ORC_X86_pcmpgtq, + ORC_X86_addps, + ORC_X86_subps, + ORC_X86_mulps, + ORC_X86_divps, + ORC_X86_sqrtps, + ORC_X86_addpd, + ORC_X86_subpd, + ORC_X86_mulpd, + ORC_X86_divpd, + ORC_X86_sqrtpd, + ORC_X86_cmpeqps, + ORC_X86_cmpeqpd, + ORC_X86_cmpltps, + ORC_X86_cmpltpd, + ORC_X86_cmpleps, + ORC_X86_cmplepd, + ORC_X86_cvttps2dq, + ORC_X86_cvttpd2dq, + ORC_X86_cvtdq2ps, + ORC_X86_cvtdq2pd, + ORC_X86_cvtps2pd, + ORC_X86_cvtpd2ps, + ORC_X86_minps, + ORC_X86_minpd, + ORC_X86_maxps, + ORC_X86_maxpd, + ORC_X86_psraw_imm, + ORC_X86_psrlw_imm, + ORC_X86_psllw_imm, + ORC_X86_psrad_imm, + ORC_X86_psrld_imm, + ORC_X86_pslld_imm, + ORC_X86_psrlq_imm, + ORC_X86_psllq_imm, + ORC_X86_psrldq_imm, + ORC_X86_pslldq_imm, +}; + + + +#define orc_sse_emit_punpcklbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpcklbw, a, b) +#define orc_sse_emit_punpcklwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpcklwd, a, b) +#define orc_sse_emit_punpckldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckldq, a, b) +#define orc_sse_emit_packsswb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packsswb, a, b) +#define orc_sse_emit_pcmpgtb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtb, a, b) +#define orc_sse_emit_pcmpgtw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtw, a, b) +#define orc_sse_emit_pcmpgtd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtd, a, b) +#define orc_sse_emit_packuswb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packuswb, a, b) +#define orc_sse_emit_punpckhbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhbw, a, b) +#define orc_sse_emit_punpckhwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhwd, a, b) +#define orc_sse_emit_punpckhdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhdq, a, b) +#define orc_sse_emit_packssdw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packssdw, a, b) +#define orc_sse_emit_punpcklqdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpcklqdq, a, b) +#define orc_sse_emit_punpckhqdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_punpckhqdq, a, b) +#define orc_sse_emit_movdqa(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_movdqa, a, b) +//#define orc_sse_emit_psraw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psraw, a, b) +//#define orc_sse_emit_psrlw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlw, a, b) +//#define orc_sse_emit_psllw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllw, a, b) +//#define orc_sse_emit_psrad(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrad, a, b) +//#define orc_sse_emit_psrld(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrld, a, b) +//#define orc_sse_emit_pslld(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslld, a, b) +//#define orc_sse_emit_psrlq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlq, a, b) +//#define orc_sse_emit_psllq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllq, a, b) +//#define orc_sse_emit_psrldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrldq, a, b) +//#define orc_sse_emit_pslldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslldq, a, b) +#define orc_sse_emit_psrlq_reg(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlq_reg, a, b) +#define orc_sse_emit_pcmpeqb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqb, a, b) +#define orc_sse_emit_pcmpeqw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqw, a, b) +#define orc_sse_emit_pcmpeqd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqd, a, b) +#define orc_sse_emit_paddq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddq, a, b) +#define orc_sse_emit_pmullw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmullw, a, b) +#define orc_sse_emit_psubusb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubusb, a, b) +#define orc_sse_emit_psubusw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubusw, a, b) +#define orc_sse_emit_pminub(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminub, a, b) +#define orc_sse_emit_pand(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pand, a, b) +#define orc_sse_emit_paddusb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddusb, a, b) +#define orc_sse_emit_paddusw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddusw, a, b) +#define orc_sse_emit_pmaxub(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxub, a, b) +#define orc_sse_emit_pandn(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pandn, a, b) +#define orc_sse_emit_pavgb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pavgb, a, b) +#define orc_sse_emit_pavgw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pavgw, a, b) +#define orc_sse_emit_pmulhuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulhuw, a, b) +#define orc_sse_emit_pmulhw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulhw, a, b) +#define orc_sse_emit_psubsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubsb, a, b) +#define orc_sse_emit_psubsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubsw, a, b) +#define orc_sse_emit_pminsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminsw, a, b) +#define orc_sse_emit_por(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_por, a, b) +#define orc_sse_emit_paddsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddsb, a, b) +#define orc_sse_emit_paddsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddsw, a, b) +#define orc_sse_emit_pmaxsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxsw, a, b) +#define orc_sse_emit_pxor(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pxor, a, b) +#define orc_sse_emit_pmuludq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmuludq, a, b) +#define orc_sse_emit_pmaddwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaddwd, a, b) +#define orc_sse_emit_psadbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psadbw, a, b) +#define orc_sse_emit_psubb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubb, a, b) +#define orc_sse_emit_psubw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubw, a, b) +#define orc_sse_emit_psubd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubd, a, b) +#define orc_sse_emit_psubq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psubq, a, b) +#define orc_sse_emit_paddb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddb, a, b) +#define orc_sse_emit_paddw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddw, a, b) +#define orc_sse_emit_paddd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_paddd, a, b) +#define orc_sse_emit_pshufb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pshufb, a, b) +#define orc_sse_emit_phaddw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phaddw, a, b) +#define orc_sse_emit_phaddd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phaddd, a, b) +#define orc_sse_emit_phaddsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phaddsw, a, b) +#define orc_sse_emit_pmaddubsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaddubsw, a, b) +#define orc_sse_emit_phsubw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phsubw, a, b) +#define orc_sse_emit_phsubd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phsubd, a, b) +#define orc_sse_emit_phsubsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phsubsw, a, b) +#define orc_sse_emit_psignb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psignb, a, b) +#define orc_sse_emit_psignw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psignw, a, b) +#define orc_sse_emit_psignd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psignd, a, b) +#define orc_sse_emit_pmulhrsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulhrsw, a, b) +#define orc_sse_emit_pabsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pabsb, a, b) +#define orc_sse_emit_pabsw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pabsw, a, b) +#define orc_sse_emit_pabsd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pabsd, a, b) +#define orc_sse_emit_pmovsxbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxbw, a, b) +#define orc_sse_emit_pmovsxbd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxbd, a, b) +#define orc_sse_emit_pmovsxbq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxbq, a, b) +#define orc_sse_emit_pmovsxwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxwd, a, b) +#define orc_sse_emit_pmovsxwq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxwq, a, b) +#define orc_sse_emit_pmovsxdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovsxdq, a, b) +#define orc_sse_emit_pmuldq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmuldq, a, b) +#define orc_sse_emit_pcmpeqq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpeqq, a, b) +#define orc_sse_emit_packusdw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_packusdw, a, b) +#define orc_sse_emit_pmovzxbw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxbw, a, b) +#define orc_sse_emit_pmovzxbd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxbd, a, b) +#define orc_sse_emit_pmovzxbq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxbq, a, b) +#define orc_sse_emit_pmovzxwd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxwd, a, b) +#define orc_sse_emit_pmovzxwq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxwq, a, b) +#define orc_sse_emit_pmovzxdq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmovzxdq, a, b) +#define orc_sse_emit_pmulld(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmulld, a, b) +#define orc_sse_emit_phminposuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_phminposuw, a, b) +#define orc_sse_emit_pminsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminsb, a, b) +#define orc_sse_emit_pminsd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminsd, a, b) +#define orc_sse_emit_pminuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminuw, a, b) +#define orc_sse_emit_pminud(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pminud, a, b) +#define orc_sse_emit_pmaxsb(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxsb, a, b) +#define orc_sse_emit_pmaxsd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxsd, a, b) +#define orc_sse_emit_pmaxuw(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxuw, a, b) +#define orc_sse_emit_pmaxud(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pmaxud, a, b) +#define orc_sse_emit_pcmpgtq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pcmpgtq, a, b) +#define orc_sse_emit_addps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_addps, a, b) +#define orc_sse_emit_subps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_subps, a, b) +#define orc_sse_emit_mulps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_mulps, a, b) +#define orc_sse_emit_divps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_divps, a, b) +#define orc_sse_emit_sqrtps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_sqrtps, a, b) +#define orc_sse_emit_addpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_addpd, a, b) +#define orc_sse_emit_subpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_subpd, a, b) +#define orc_sse_emit_mulpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_mulpd, a, b) +#define orc_sse_emit_divpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_divpd, a, b) +#define orc_sse_emit_sqrtpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_sqrtpd, a, b) +#define orc_sse_emit_cmpeqps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpeqps, a, b) +#define orc_sse_emit_cmpeqpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpeqpd, a, b) +#define orc_sse_emit_cmpltps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpltps, a, b) +#define orc_sse_emit_cmpltpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpltpd, a, b) +#define orc_sse_emit_cmpleps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmpleps, a, b) +#define orc_sse_emit_cmplepd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cmplepd, a, b) +#define orc_sse_emit_cvttps2dq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvttps2dq, a, b) +#define orc_sse_emit_cvttpd2dq(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvttpd2dq, a, b) +#define orc_sse_emit_cvtdq2ps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtdq2ps, a, b) +#define orc_sse_emit_cvtdq2pd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtdq2pd, a, b) +#define orc_sse_emit_cvtps2pd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtps2pd, a, b) +#define orc_sse_emit_cvtpd2ps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_cvtpd2ps, a, b) +#define orc_sse_emit_minps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_minps, a, b) +#define orc_sse_emit_minpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_minpd, a, b) +#define orc_sse_emit_maxps(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_maxps, a, b) +#define orc_sse_emit_maxpd(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_maxpd, a, b) +#define orc_sse_emit_psraw_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psraw_imm, a, b) +#define orc_sse_emit_psrlw_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlw_imm, a, b) +#define orc_sse_emit_psllw_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllw_imm, a, b) +#define orc_sse_emit_psrad_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrad_imm, a, b) +#define orc_sse_emit_psrld_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrld_imm, a, b) +#define orc_sse_emit_pslld_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslld_imm, a, b) +#define orc_sse_emit_psrlq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrlq_imm, a, b) +#define orc_sse_emit_psllq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psllq_imm, a, b) +#define orc_sse_emit_psrldq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psrldq_imm, a, b) +#define orc_sse_emit_pslldq_imm(p,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pslldq_imm, a, b) + +#endif + -- 2.7.4