From b6a2ca3c864f63377a84c3a5b4fa4cd933549ec8 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Thu, 12 May 2011 14:38:40 -0700 Subject: [PATCH] x86insn: Convert mmx backend to x86insn --- orc/orcmmx.c | 181 ++++++++---------------------- orc/orcmmx.h | 140 +---------------------- orc/orcprogram-mmx.c | 12 +- orc/orcprogram-sse.c | 6 + orc/orcrules-mmx.c | 62 +++------- orc/orcrules-sse.c | 34 +----- orc/orcsse.c | 2 + orc/orcsse.h | 2 +- orc/orcx86insn.c | 312 +++++++++++++++++++++++++++++---------------------- orc/orcx86insn.h | 194 ++++++++++++++++++++++++++++++-- 10 files changed, 450 insertions(+), 495 deletions(-) diff --git a/orc/orcmmx.c b/orc/orcmmx.c index 5e31387..70f00f6 100644 --- a/orc/orcmmx.c +++ b/orc/orcmmx.c @@ -37,118 +37,7 @@ orc_x86_get_regname_mmx(int i) } } -void -orc_mmx_emit_f20f (OrcCompiler *p, const char *insn_name, int code, - int src, int dest) -{ - ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, - orc_x86_get_regname_mmx(src), - orc_x86_get_regname_mmx(dest)); - *p->codeptr++ = 0xf2; - orc_x86_emit_rex (p, 0, dest, 0, src); - *p->codeptr++ = 0x0f; - *p->codeptr++ = code; - orc_x86_emit_modrm_reg (p, src, dest); -} - -void -orc_mmx_emit_f30f (OrcCompiler *p, const char *insn_name, int code, - int src, int dest) -{ - ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, - orc_x86_get_regname_mmx(src), - orc_x86_get_regname_mmx(dest)); - *p->codeptr++ = 0xf3; - orc_x86_emit_rex (p, 0, dest, 0, src); - *p->codeptr++ = 0x0f; - *p->codeptr++ = code; - orc_x86_emit_modrm_reg (p, src, dest); -} - -void -orc_mmx_emit_0f (OrcCompiler *p, const char *insn_name, int code, - int src, int dest) -{ - ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, - orc_x86_get_regname_mmx(src), - orc_x86_get_regname_mmx(dest)); - orc_x86_emit_rex (p, 0, dest, 0, src); - *p->codeptr++ = 0x0f; - *p->codeptr++ = code; - orc_x86_emit_modrm_reg (p, src, dest); -} - -void -orc_mmx_emit_660f (OrcCompiler *p, const char *insn_name, int code, - int src, int dest) -{ - ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name, - orc_x86_get_regname_mmx(src), - orc_x86_get_regname_mmx(dest)); - orc_x86_emit_rex (p, 0, dest, 0, src); - *p->codeptr++ = 0x0f; - if (code & 0xff00) { - *p->codeptr++ = code >> 8; - } - *p->codeptr++ = code & 0xff; - orc_x86_emit_modrm_reg (p, src, dest); -} - -void -orc_mmx_emit_pshufw (OrcCompiler *p, int shuf, int src, int dest) -{ - ORC_ASM_CODE(p," pshufw $0x%04x, %%%s, %%%s\n", shuf, - orc_x86_get_regname_mmx(src), - orc_x86_get_regname_mmx(dest)); - orc_x86_emit_rex (p, 0, dest, 0, src); - *p->codeptr++ = 0x0f; - *p->codeptr++ = 0x70; - orc_x86_emit_modrm_reg (p, src, dest); - *p->codeptr++ = shuf; -} - -void -orc_mmx_emit_pinsrw_memoffset (OrcCompiler *p, int imm, int offset, - int src, int dest) -{ - ORC_ASM_CODE(p," pinsrw $%d, %d(%%%s), %%%s\n", imm, offset, - orc_x86_get_regname_ptr(p, src), - orc_x86_get_regname_mmx(dest)); - orc_x86_emit_rex (p, 0, dest, 0, src); - *p->codeptr++ = 0x0f; - *p->codeptr++ = 0xc4; - orc_x86_emit_modrm_memoffset (p, offset, src, dest); - *p->codeptr++ = imm; - -} - -void -orc_mmx_emit_pextrw_memoffset (OrcCompiler *p, int imm, int src, - int offset, int dest) -{ - ORC_ASM_CODE(p," pextrw $%d, %%%s, %d(%%%s)\n", imm, - orc_x86_get_regname_ptr(p, src), - offset, orc_x86_get_regname_mmx(dest)); - orc_x86_emit_rex (p, 0, src, 0, dest); - *p->codeptr++ = 0x0f; - *p->codeptr++ = 0xc4; - orc_x86_emit_modrm_memoffset (p, offset, dest, src); - *p->codeptr++ = imm; -} - -void -orc_mmx_emit_shiftimm (OrcCompiler *p, const char *insn_name, int code, - int modrm_code, int shift, int reg) -{ - ORC_ASM_CODE(p," %s $%d, %%%s\n", insn_name, shift, - orc_x86_get_regname_mmx(reg)); - orc_x86_emit_rex (p, 0, 0, 0, reg); - *p->codeptr++ = 0x0f; - *p->codeptr++ = code; - orc_x86_emit_modrm_reg (p, reg, modrm_code); - *p->codeptr++ = shift; -} - +#if 0 void orc_x86_emit_mov_memindex_mmx (OrcCompiler *compiler, int size, int offset, int reg1, int regindex, int shift, int reg2, int is_aligned) @@ -230,35 +119,59 @@ orc_x86_emit_mov_mmx_memoffset (OrcCompiler *compiler, int size, int reg1, int o orc_x86_emit_modrm_memoffset (compiler, offset, reg2, reg1); } +#endif -void orc_x86_emit_mov_mmx_reg_reg (OrcCompiler *compiler, int reg1, int reg2) +void +orc_x86_emit_mov_memoffset_mmx (OrcCompiler *compiler, int size, int offset, + int reg1, int reg2, int is_aligned) { - ORC_ASM_CODE(compiler," movq %%%s, %%%s\n", orc_x86_get_regname_mmx(reg1), - orc_x86_get_regname_mmx(reg2)); - - orc_x86_emit_rex(compiler, 0, reg1, 0, reg2); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0x6f; - orc_x86_emit_modrm_reg (compiler, reg1, reg2); + switch (size) { + case 4: + orc_mmx_emit_movd_load_memoffset (compiler, offset, reg1, reg2); + break; + case 8: + orc_mmx_emit_movq_load_memoffset (compiler, offset, reg1, reg2); + break; + default: + ORC_COMPILER_ERROR(compiler, "bad size"); + break; + } } -void orc_x86_emit_mov_reg_mmx (OrcCompiler *compiler, int reg1, int reg2) +void +orc_x86_emit_mov_memindex_mmx (OrcCompiler *compiler, int size, int offset, + int reg1, int regindex, int shift, int reg2, int is_aligned) { - ORC_ASM_CODE(compiler," movd %%%s, %%%s\n", orc_x86_get_regname(reg1), - orc_x86_get_regname_mmx(reg2)); - orc_x86_emit_rex(compiler, 0, reg2, 0, reg1); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0x6e; - orc_x86_emit_modrm_reg (compiler, reg1, reg2); + switch (size) { + case 4: + orc_mmx_emit_movd_load_memindex (compiler, offset, + reg1, regindex, shift, reg2); + break; + case 8: + orc_mmx_emit_movq_load_memindex (compiler, offset, + reg1, regindex, shift, reg2); + break; + default: + ORC_COMPILER_ERROR(compiler, "bad size"); + break; + } } -void orc_x86_emit_mov_mmx_reg (OrcCompiler *compiler, int reg1, int reg2) +void +orc_x86_emit_mov_mmx_memoffset (OrcCompiler *compiler, int size, int reg1, + int offset, int reg2, int aligned, int uncached) { - ORC_ASM_CODE(compiler," movd %%%s, %%%s\n", orc_x86_get_regname_mmx(reg1), - orc_x86_get_regname(reg2)); - orc_x86_emit_rex(compiler, 0, reg1, 0, reg2); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0x7e; - orc_x86_emit_modrm_reg (compiler, reg2, reg1); + switch (size) { + case 4: + orc_mmx_emit_movd_store_memoffset (compiler, offset, reg1, reg2); + break; + case 8: + orc_mmx_emit_movq_store_memoffset (compiler, offset, reg1, reg2); + break; + default: + ORC_COMPILER_ERROR(compiler, "bad size"); + break; + } + } diff --git a/orc/orcmmx.h b/orc/orcmmx.h index 0f6dc65..bdbd316 100644 --- a/orc/orcmmx.h +++ b/orc/orcmmx.h @@ -3,6 +3,7 @@ #define _ORC_MMX_H_ #include +#include ORC_BEGIN_DECLS @@ -41,6 +42,7 @@ void orc_x86_emit_mov_memindex_mmx (OrcCompiler *compiler, int size, int offset, int reg1, int regindex, int shift, int reg2, int is_aligned); void orc_x86_emit_mov_mmx_memoffset (OrcCompiler *compiler, int size, int reg1, int offset, int reg2, int aligned, int uncached); +#if 0 void orc_x86_emit_mov_mmx_reg_reg (OrcCompiler *compiler, int reg1, int reg2); void orc_x86_emit_mov_reg_mmx (OrcCompiler *compiler, int reg1, int reg2); void orc_x86_emit_mov_mmx_reg (OrcCompiler *compiler, int reg1, int reg2); @@ -68,148 +70,12 @@ void orc_mmx_emit_pextrw_memoffset (OrcCompiler *p, int imm, int src, int offset, int dest); void orc_mmx_emit_shiftimm (OrcCompiler *p, const char *insn_name, int code, int modrm_code, int shift, int reg); +#endif unsigned int orc_mmx_get_cpu_flags (void); void orc_mmx_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 value); -/* MMX instructions */ -#define orc_mmx_emit_punpcklbw(p,a,b) orc_mmx_emit_660f (p, "punpcklbw", 0x60, a, b) -#define orc_mmx_emit_punpcklwd(p,a,b) orc_mmx_emit_660f (p, "punpcklwd", 0x61, a, b) -#define orc_mmx_emit_punpckldq(p,a,b) orc_mmx_emit_660f (p, "punpckldq", 0x62, a, b) -#define orc_mmx_emit_packsswb(p,a,b) orc_mmx_emit_660f (p, "packsswb", 0x63, a, b) -#define orc_mmx_emit_pcmpgtb(p,a,b) orc_mmx_emit_660f (p, "pcmpgtb", 0x64, a, b) -#define orc_mmx_emit_pcmpgtw(p,a,b) orc_mmx_emit_660f (p, "pcmpgtw", 0x65, a, b) -#define orc_mmx_emit_pcmpgtd(p,a,b) orc_mmx_emit_660f (p, "pcmpgtd", 0x66, a, b) -#define orc_mmx_emit_packuswb(p,a,b) orc_mmx_emit_660f (p, "packuswb", 0x67, a, b) -#define orc_mmx_emit_punpckhbw(p,a,b) orc_mmx_emit_660f (p, "punpckhbw", 0x68, a, b) -#define orc_mmx_emit_punpckhwd(p,a,b) orc_mmx_emit_660f (p, "punpckhwd", 0x69, a, b) -#define orc_mmx_emit_punpckhdq(p,a,b) orc_mmx_emit_660f (p, "punpckhdq", 0x6a, a, b) -#define orc_mmx_emit_packssdw(p,a,b) orc_mmx_emit_660f (p, "packssdw", 0x6b, a, b) - -#define orc_mmx_emit_movq(p,a,b) orc_mmx_emit_660f (p, "movq", 0x6f, a, b) - -#define orc_mmx_emit_psraw_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psraw", 0x71, 4, a, b) -#define orc_mmx_emit_psrlw_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrlw", 0x71, 2, a, b) -#define orc_mmx_emit_psllw_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psllw", 0x71, 6, a, b) - -#define orc_mmx_emit_psrad_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrad", 0x72, 4, a, b) -#define orc_mmx_emit_psrld_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrld", 0x72, 2, a, b) -#define orc_mmx_emit_pslld_imm(p,a,b) orc_mmx_emit_shiftimm (p, "pslld", 0x72, 6, a, b) - -#define orc_mmx_emit_psrlq_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrlq", 0x73, 2, a, b) -#define orc_mmx_emit_psllq_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psllq", 0x73, 6, a, b) - -#define orc_mmx_emit_pcmpeqb(p,a,b) orc_mmx_emit_660f (p, "pcmpeqb", 0x74, a, b) -#define orc_mmx_emit_pcmpeqw(p,a,b) orc_mmx_emit_660f (p, "pcmpeqw", 0x75, a, b) -#define orc_mmx_emit_pcmpeqd(p,a,b) orc_mmx_emit_660f (p, "pcmpeqd", 0x76, a, b) -#define orc_mmx_emit_emms(p) orc_x86_emit_emms (p) - -#define orc_mmx_emit_pmullw(p,a,b) orc_mmx_emit_660f (p, "pmullw", 0xd5, a, b) - -#define orc_mmx_emit_psubusb(p,a,b) orc_mmx_emit_660f (p, "psubusb", 0xd8, a, b) -#define orc_mmx_emit_psubusw(p,a,b) orc_mmx_emit_660f (p, "psubusw", 0xd9, a, b) -#define orc_mmx_emit_pand(p,a,b) orc_mmx_emit_660f (p, "pand", 0xdb, a, b) -#define orc_mmx_emit_paddusb(p,a,b) orc_mmx_emit_660f (p, "paddusb", 0xdc, a, b) -#define orc_mmx_emit_paddusw(p,a,b) orc_mmx_emit_660f (p, "paddusw", 0xdd, a, b) -#define orc_mmx_emit_pandn(p,a,b) orc_mmx_emit_660f (p, "pandn", 0xdf, a, b) - -#define orc_mmx_emit_pmulhw(p,a,b) orc_mmx_emit_660f (p, "pmulhw", 0xe5, a, b) - -#define orc_mmx_emit_psubsb(p,a,b) orc_mmx_emit_660f (p, "psubsb", 0xe8, a, b) -#define orc_mmx_emit_psubsw(p,a,b) orc_mmx_emit_660f (p, "psubsw", 0xe9, a, b) -#define orc_mmx_emit_por(p,a,b) orc_mmx_emit_660f (p, "por", 0xeb, a, b) -#define orc_mmx_emit_paddsb(p,a,b) orc_mmx_emit_660f (p, "paddsb", 0xec, a, b) -#define orc_mmx_emit_paddsw(p,a,b) orc_mmx_emit_660f (p, "paddsw", 0xed, a, b) -#define orc_mmx_emit_pxor(p,a,b) orc_mmx_emit_660f (p, "pxor", 0xef, a, b) - -#define orc_mmx_emit_pmaddwd(p,a,b) orc_mmx_emit_660f (p, "pmaddwd", 0xf5, a, b) - -#define orc_mmx_emit_psubb(p,a,b) orc_mmx_emit_660f (p, "psubb", 0xf8, a, b) -#define orc_mmx_emit_psubw(p,a,b) orc_mmx_emit_660f (p, "psubw", 0xf9, a, b) -#define orc_mmx_emit_psubd(p,a,b) orc_mmx_emit_660f (p, "psubd", 0xfa, a, b) - -#define orc_mmx_emit_paddb(p,a,b) orc_mmx_emit_660f (p, "paddb", 0xfc, a, b) -#define orc_mmx_emit_paddw(p,a,b) orc_mmx_emit_660f (p, "paddw", 0xfd, a, b) -#define orc_mmx_emit_paddd(p,a,b) orc_mmx_emit_660f (p, "paddd", 0xfe, a, b) - -/* MMX EXT instructions */ -/* MMX SSE instructions */ -#define orc_mmx_emit_pinsrw(p,a,b) orc_mmx_emit_660f (p, "pinsrw", 0xc4, a, b) -#define orc_mmx_emit_pextrw(p,a,b) orc_mmx_emit_660f (p, "pextrw", 0xc5, a, b) -#define orc_mmx_emit_pminub(p,a,b) orc_mmx_emit_660f (p, "pminub", 0xda, a, b) -#define orc_mmx_emit_pmaxub(p,a,b) orc_mmx_emit_660f (p, "pmaxub", 0xde, a, b) -#define orc_mmx_emit_pavgb(p,a,b) orc_mmx_emit_660f (p, "pavgb", 0xe0, a, b) -#define orc_mmx_emit_pavgw(p,a,b) orc_mmx_emit_660f (p, "pavgw", 0xe3, a, b) -#define orc_mmx_emit_pmulhuw(p,a,b) orc_mmx_emit_660f (p, "pmulhuw", 0xe4, a, b) -#define orc_mmx_emit_pminsw(p,a,b) orc_mmx_emit_660f (p, "pminsw", 0xea, a, b) -#define orc_mmx_emit_pmaxsw(p,a,b) orc_mmx_emit_660f (p, "pmaxsw", 0xee, a, b) -#define orc_mmx_emit_psadbw(p,a,b) orc_mmx_emit_660f (p, "psadbw", 0xf6, a, b) - -/* 3DNow! instrunctions, FIXME, not correct */ -#define orc_mmx_emit_pmulhrw(p,a,b) orc_mmx_emit_660f (p, "pmulhrw", 0xb7, a, b) -#define orc_mmx_emit_pavgusb(p,a,b) orc_mmx_emit_660f (p, "pavgusb", 0xbf, a, b) - -/* MMX SSE2 instructions */ -#define orc_mmx_emit_paddq(p,a,b) orc_mmx_emit_660f (p, "paddq", 0xd4, a, b) -#define orc_mmx_emit_psubq(p,a,b) orc_mmx_emit_660f (p, "psubq", 0xfb, a, b) -#define orc_mmx_emit_pmuludq(p,a,b) orc_mmx_emit_660f (p, "pmuludq", 0xf4, a, b) - -/* MMX SSE3 instructions */ - -/* MMX SSSE3 instructions */ -#define orc_mmx_emit_pshufb(p,a,b) orc_mmx_emit_660f (p, "pshufb", 0x3800, a, b) -#define orc_mmx_emit_phaddw(p,a,b) orc_mmx_emit_660f (p, "phaddw", 0x3801, a, b) -#define orc_mmx_emit_phaddd(p,a,b) orc_mmx_emit_660f (p, "phaddd", 0x3802, a, b) -#define orc_mmx_emit_phaddsw(p,a,b) orc_mmx_emit_660f (p, "phaddsw", 0x3803, a, b) -#define orc_mmx_emit_pmaddubsw(p,a,b) orc_mmx_emit_660f (p, "pmaddubsw", 0x3804, a, b) -#define orc_mmx_emit_phsubw(p,a,b) orc_mmx_emit_660f (p, "phsubw", 0x3805, a, b) -#define orc_mmx_emit_phsubd(p,a,b) orc_mmx_emit_660f (p, "phsubd", 0x3806, a, b) -#define orc_mmx_emit_phsubsw(p,a,b) orc_mmx_emit_660f (p, "phsubsw", 0x3807, a, b) -#define orc_mmx_emit_psignb(p,a,b) orc_mmx_emit_660f (p, "psignb", 0x3808, a, b) -#define orc_mmx_emit_psignw(p,a,b) orc_mmx_emit_660f (p, "psignw", 0x3809, a, b) -#define orc_mmx_emit_psignd(p,a,b) orc_mmx_emit_660f (p, "psignd", 0x380a, a, b) -#define orc_mmx_emit_pmulhrsw(p,a,b) orc_mmx_emit_660f (p, "pmulhrsw", 0x380b, a, b) - -#define orc_mmx_emit_pabsb(p,a,b) orc_mmx_emit_660f (p, "pabsb", 0x381c, a, b) -#define orc_mmx_emit_pabsw(p,a,b) orc_mmx_emit_660f (p, "pabsw", 0x381d, a, b) -#define orc_mmx_emit_pabsd(p,a,b) orc_mmx_emit_660f (p, "pabsd", 0x381e, a, b) - -/* MMX SSE4.1 instructions */ -#define orc_mmx_emit_pmovsxbw(p,a,b) orc_mmx_emit_660f (p, "pmovsxbw", 0x3820, a, b) -#define orc_mmx_emit_pmovsxbd(p,a,b) orc_mmx_emit_660f (p, "pmovsxbd", 0x3821, a, b) -#define orc_mmx_emit_pmovsxbq(p,a,b) orc_mmx_emit_660f (p, "pmovsxbq", 0x3822, a, b) -#define orc_mmx_emit_pmovsxwd(p,a,b) orc_mmx_emit_660f (p, "pmovsxwd", 0x3823, a, b) -#define orc_mmx_emit_pmovsxwq(p,a,b) orc_mmx_emit_660f (p, "pmovsxwq", 0x3824, a, b) -#define orc_mmx_emit_pmovsxdq(p,a,b) orc_mmx_emit_660f (p, "pmovsxdq", 0x3825, a, b) - -#define orc_mmx_emit_pmuldq(p,a,b) orc_mmx_emit_660f (p, "pmuldq", 0x3828, a, b) -#define orc_mmx_emit_pcmpeqq(p,a,b) orc_mmx_emit_660f (p, "pcmpeqq", 0x3829, a, b) - -#define orc_mmx_emit_packusdw(p,a,b) orc_mmx_emit_660f (p, "packusdw", 0x382b, a, b) - -#define orc_mmx_emit_pmovzxbw(p,a,b) orc_mmx_emit_660f (p, "pmovzxbw", 0x3830, a, b) -#define orc_mmx_emit_pmovzxbd(p,a,b) orc_mmx_emit_660f (p, "pmovzxbd", 0x3831, a, b) -#define orc_mmx_emit_pmovzxbq(p,a,b) orc_mmx_emit_660f (p, "pmovzxbq", 0x3832, a, b) -#define orc_mmx_emit_pmovzxwd(p,a,b) orc_mmx_emit_660f (p, "pmovzxwd", 0x3833, a, b) -#define orc_mmx_emit_pmovzxwq(p,a,b) orc_mmx_emit_660f (p, "pmovzxwq", 0x3834, a, b) -#define orc_mmx_emit_pmovzxdq(p,a,b) orc_mmx_emit_660f (p, "pmovzxdq", 0x3835, a, b) - -#define orc_mmx_emit_pmulld(p,a,b) orc_mmx_emit_660f (p, "pmuldq", 0x3840, a, b) -#define orc_mmx_emit_phminposuw(p,a,b) orc_mmx_emit_660f (p, "phminposuw", 0x3841, a, b) - -#define orc_mmx_emit_pminsb(p,a,b) orc_mmx_emit_660f (p, "pminsb", 0x3838, a, b) -#define orc_mmx_emit_pminsd(p,a,b) orc_mmx_emit_660f (p, "pminsd", 0x3839, a, b) -#define orc_mmx_emit_pminuw(p,a,b) orc_mmx_emit_660f (p, "pminuw", 0x383a, a, b) -#define orc_mmx_emit_pminud(p,a,b) orc_mmx_emit_660f (p, "pminud", 0x383b, a, b) -#define orc_mmx_emit_pmaxsb(p,a,b) orc_mmx_emit_660f (p, "pmaxsb", 0x383c, a, b) -#define orc_mmx_emit_pmaxsd(p,a,b) orc_mmx_emit_660f (p, "pmaxsd", 0x383d, a, b) -#define orc_mmx_emit_pmaxuw(p,a,b) orc_mmx_emit_660f (p, "pmaxuw", 0x383e, a, b) -#define orc_mmx_emit_pmaxud(p,a,b) orc_mmx_emit_660f (p, "pmaxud", 0x383f, a, b) - -/* SSE4.2 instructions */ -#define orc_mmx_emit_pcmpgtq(p,a,b) orc_mmx_emit_660f (p, "pcmpgtq", 0x3837, a, b) - #endif ORC_END_DECLS diff --git a/orc/orcprogram-mmx.c b/orc/orcprogram-mmx.c index 258e0ba..d081a8c 100644 --- a/orc/orcprogram-mmx.c +++ b/orc/orcprogram-mmx.c @@ -318,7 +318,7 @@ mmx_save_accumulators (OrcCompiler *compiler) } if (compiler->vars[i].size == 2) { - orc_x86_emit_mov_mmx_reg (compiler, src, compiler->gp_tmpreg); + orc_mmx_emit_movd_store_register (compiler, src, compiler->gp_tmpreg); orc_x86_emit_and_imm_reg (compiler, 4, 0xffff, compiler->gp_tmpreg); orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, (int)ORC_STRUCT_OFFSET(OrcExecutor, accumulators[i-ORC_VAR_A1]), @@ -429,7 +429,7 @@ orc_mmx_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 valu } orc_x86_emit_mov_imm_reg (compiler, 4, value, compiler->gp_tmpreg); - orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, reg); + orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, reg); #ifndef MMX orc_mmx_emit_pshufd (compiler, ORC_MMX_SHUF(0,0,0,0), reg, reg); #else @@ -1024,9 +1024,15 @@ orc_mmx_emit_loop (OrcCompiler *compiler, int offset, int update) if (!(insn->opcode->flags & (ORC_STATIC_OPCODE_ACCUMULATOR|ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_STORE)) && compiler->vars[insn->dest_args[0]].alloc != compiler->vars[insn->src_args[0]].alloc) { - orc_x86_emit_mov_mmx_reg_reg (compiler, +#ifdef MMX + orc_mmx_emit_movq (compiler, compiler->vars[insn->src_args[0]].alloc, compiler->vars[insn->dest_args[0]].alloc); +#else + orc_mmx_emit_movdqu (compiler, + compiler->vars[insn->src_args[0]].alloc, + compiler->vars[insn->dest_args[0]].alloc); +#endif } rule->emit (compiler, rule->emit_user, insn); } else { diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 3f86924..8447462 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -1024,9 +1024,15 @@ orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) if (!(insn->opcode->flags & (ORC_STATIC_OPCODE_ACCUMULATOR|ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_STORE)) && compiler->vars[insn->dest_args[0]].alloc != compiler->vars[insn->src_args[0]].alloc) { +#ifdef MMX + orc_sse_emit_movq (compiler, + compiler->vars[insn->src_args[0]].alloc, + compiler->vars[insn->dest_args[0]].alloc); +#else orc_sse_emit_movdqu (compiler, compiler->vars[insn->src_args[0]].alloc, compiler->vars[insn->dest_args[0]].alloc); +#endif } rule->emit (compiler, rule->emit_user, insn); } else { diff --git a/orc/orcrules-mmx.c b/orc/orcrules-mmx.c index 923405b..ef281d2 100644 --- a/orc/orcrules-mmx.c +++ b/orc/orcrules-mmx.c @@ -32,7 +32,7 @@ mmx_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), compiler->exec_reg, reg, FALSE); #ifndef MMX - orc_x86_emit_movhps_memoffset_mmx (compiler, + orc_mmx_emit_movhps_load_memoffset (compiler, (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]), compiler->exec_reg, reg); @@ -106,7 +106,7 @@ mmx_rule_loadX (OrcCompiler *compiler, void *user, OrcInstruction *insn) case 1: orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg, compiler->gp_tmpreg); - orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, dest->alloc); + orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, dest->alloc); break; case 2: orc_mmx_emit_pxor (compiler, dest->alloc, dest->alloc); @@ -161,7 +161,7 @@ mmx_rule_loadoffX (OrcCompiler *compiler, void *user, OrcInstruction *insn) case 1: orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg, compiler->gp_tmpreg); - orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, dest->alloc); + orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, dest->alloc); break; case 2: orc_mmx_emit_pxor (compiler, dest->alloc, dest->alloc); @@ -271,7 +271,7 @@ mmx_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn) case 2: orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg, compiler->gp_tmpreg); - orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, dest->alloc); + orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, dest->alloc); break; case 4: orc_mmx_emit_pinsrw_memoffset (compiler, 0, offset, ptr_reg, dest->alloc); @@ -330,7 +330,7 @@ mmx_rule_storeX (OrcCompiler *compiler, void *user, OrcInstruction *insn) if (ptr_reg == compiler->gp_tmpreg) { ORC_COMPILER_ERROR(compiler,"unimplemented"); } - orc_x86_emit_mov_mmx_reg (compiler, src->alloc, compiler->gp_tmpreg); + orc_mmx_emit_movd_store_register (compiler, src->alloc, compiler->gp_tmpreg); orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg, offset, ptr_reg); break; @@ -343,7 +343,7 @@ mmx_rule_storeX (OrcCompiler *compiler, void *user, OrcInstruction *insn) if (ptr_reg == compiler->gp_tmpreg) { ORC_COMPILER_ERROR(compiler,"unimplemented"); } - orc_x86_emit_mov_mmx_reg (compiler, src->alloc, compiler->gp_tmpreg); + orc_mmx_emit_movd_store_register (compiler, src->alloc, compiler->gp_tmpreg); orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg, offset, ptr_reg); } @@ -378,7 +378,7 @@ mmx_rule_ldresnearl (OrcCompiler *compiler, void *user, OrcInstruction *insn) int tmp2 = orc_compiler_get_temp_reg (compiler); int tmpc; - orc_x86_emit_mov_mmx_reg (compiler, X86_MM6, compiler->gp_tmpreg); + orc_mmx_emit_movd_store_register (compiler, X86_MM6, compiler->gp_tmpreg); orc_x86_emit_sar_imm_reg (compiler, 4, 16, compiler->gp_tmpreg); orc_mmx_emit_movdqu_load_memindex (compiler, 0, src->ptr_register, @@ -490,7 +490,7 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_mmx_emit_pshufd (compiler, ORC_MMX_SHUF(3,2,3,2), tmp, tmp2); orc_mmx_emit_psubw (compiler, tmp, tmp2); - orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp); + orc_mmx_emit_movd_load_register (compiler, src->ptr_offset, tmp); orc_mmx_emit_pshuflw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp); orc_mmx_emit_psrlw_imm (compiler, 8, tmp); orc_mmx_emit_pmullw (compiler, tmp2, tmp); @@ -525,7 +525,7 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) for(i=0;i<(1<loop_shift);i+=2){ orc_x86_emit_mov_memoffset_mmx (compiler, 8, 0, src->ptr_register, tmp, FALSE); - orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp4); + orc_mmx_emit_movd_load_register (compiler, src->ptr_offset, tmp4); if (compiler->vars[increment_var].vartype == ORC_VAR_TYPE_PARAM) { orc_x86_emit_add_memoffset_reg (compiler, 4, @@ -621,7 +621,7 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_mmx_emit_punpcklbw (compiler, zero, tmp2); orc_mmx_emit_psubw (compiler, tmp, tmp2); - orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp); + orc_mmx_emit_movd_load_register (compiler, src->ptr_offset, tmp); orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp); orc_mmx_emit_psrlw_imm (compiler, 8, tmp); orc_mmx_emit_pmullw (compiler, tmp2, tmp); @@ -849,12 +849,12 @@ mmx_rule_signX_ssse3 (OrcCompiler *p, void *user, OrcInstruction *insn) tmpc = orc_compiler_get_temp_constant (p, 1<vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_mmx_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type], - p->vars[insn->src_args[1]].value.i, - p->vars[insn->dest_args[0]].alloc); - } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { - int tmp = orc_compiler_get_temp_reg (p); - - /* FIXME this is a gross hack to reload the register with a - * 64-bit version of the parameter. */ - orc_x86_emit_mov_memoffset_mmx (p, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]), - p->exec_reg, tmp, FALSE); - - orc_mmx_emit_660f (p, code[type], reg_code[type], tmp, - p->vars[insn->dest_args[0]].alloc); - } else { - ORC_COMPILER_ERROR(p,"rule only works with constants or params"); - p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE; - } -} -#else static void mmx_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -972,8 +941,8 @@ mmx_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) ORC_X86_psrad_imm, ORC_X86_psllq_imm, ORC_X86_psrlq_imm }; if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_x86_emit_cpuinsn (p, opcodes_imm[type], - p->vars[insn->src_args[1]].value.i, 0, + orc_x86_emit_cpuinsn_imm (p, opcodes_imm[type], + p->vars[insn->src_args[1]].value.i, 16, p->vars[insn->dest_args[0]].alloc); } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { int tmp = orc_compiler_get_temp_reg (p); @@ -984,14 +953,13 @@ mmx_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]), p->exec_reg, tmp, FALSE); - orc_x86_emit_cpuinsn (p, opcodes[type], 0, tmp, + orc_x86_emit_cpuinsn_size (p, opcodes[type], 16, tmp, p->vars[insn->dest_args[0]].alloc); } else { ORC_COMPILER_ERROR(p,"rule only works with constants or params"); p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE; } } -#endif static void mmx_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn) diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index ce8d75c..5ac0f32 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -621,7 +621,7 @@ mmx_rule_ldreslinl (OrcCompiler *compiler, void *user, OrcInstruction *insn) orc_mmx_emit_punpcklbw (compiler, zero, tmp2); orc_mmx_emit_psubw (compiler, tmp, tmp2); - orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp); + orc_sse_emit_movd_load_register (compiler, src->ptr_offset, tmp); orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp); orc_mmx_emit_psrlw_imm (compiler, 8, tmp); orc_mmx_emit_pmullw (compiler, tmp2, tmp); @@ -925,37 +925,6 @@ sse_rule_absl_slow (OrcCompiler *p, void *user, OrcInstruction *insn) } -#ifdef MMX -static void -sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) -{ - int type = ORC_PTR_TO_INT(user); - int imm_code1[] = { 0x71, 0x71, 0x71, 0x72, 0x72, 0x72, 0x73, 0x73 }; - int imm_code2[] = { 6, 2, 4, 6, 2, 4, 6, 2 }; - int reg_code[] = { 0xf1, 0xd1, 0xe1, 0xf2, 0xd2, 0xe2, 0xf3, 0xd3 }; - const char *code[] = { "psllw", "psrlw", "psraw", "pslld", "psrld", "psrad", "psllq", "psrlq" }; - - if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) { - orc_mmx_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type], - p->vars[insn->src_args[1]].value.i, - p->vars[insn->dest_args[0]].alloc); - } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { - int tmp = orc_compiler_get_temp_reg (p); - - /* FIXME this is a gross hack to reload the register with a - * 64-bit version of the parameter. */ - orc_x86_emit_mov_memoffset_sse (p, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]), - p->exec_reg, tmp, FALSE); - - orc_mmx_emit_660f (p, code[type], reg_code[type], tmp, - p->vars[insn->dest_args[0]].alloc); - } else { - ORC_COMPILER_ERROR(p,"rule only works with constants or params"); - p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE; - } -} -#else static void sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) { @@ -991,7 +960,6 @@ sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn) p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE; } } -#endif static void sse_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn) diff --git a/orc/orcsse.c b/orc/orcsse.c index f14f398..128654f 100644 --- a/orc/orcsse.c +++ b/orc/orcsse.c @@ -10,6 +10,7 @@ #include #include #include +#include #include /** @@ -28,6 +29,7 @@ orc_x86_get_regname_sse(int i) }; if (i>=X86_XMM0 && i=X86_MM0 && icode != 0); if (opcode->prefix != 0) { - *p->codeptr++ = opcode->prefix; + if (opcode->prefix == 0x01) { + if (is_sse) { + *p->codeptr++ = 0x66; + } + } else { + *p->codeptr++ = opcode->prefix; + } } orc_x86_emit_rex (p, size, dest, 0, src); if (opcode->code & 0xff0000) { @@ -268,12 +277,29 @@ output_opcode (OrcCompiler *p, const OrcSysOpcode *opcode, int size, *p->codeptr++ = (opcode->code >> 0) & 0xff; } +const char * +orc_x86_get_regname_mmxsse (int reg, int is_sse) +{ + if (is_sse) { + return orc_x86_get_regname_sse (reg); + } else { + return orc_x86_get_regname_mmx (reg); + } +} + +int +is_sse_reg (int reg) +{ + return (reg >= X86_XMM0) && (reg <= X86_XMM15); +} + void orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) { char imm_str[40] = { 0 }; char op1_str[40] = { 0 }; char op2_str[40] = { 0 }; + int is_sse; if (xinsn->opcode->type == ORC_X86_INSN_TYPE_ALIGN) { if (xinsn->size > 0) ORC_ASM_CODE(p,".p2align %d\n", xinsn->size); @@ -284,10 +310,16 @@ orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) return; } + is_sse = FALSE; + if (is_sse_reg (xinsn->src) || is_sse_reg (xinsn->dest)) { + is_sse = TRUE; + } + switch (xinsn->opcode->type) { case ORC_X86_INSN_TYPE_MMXM_MMX: case ORC_X86_INSN_TYPE_SSEM_SSE: case ORC_X86_INSN_TYPE_MMXM_MMX_REV: + case ORC_X86_INSN_TYPE_SSEM_SSE_REV: case ORC_X86_INSN_TYPE_REGM_MMX: case ORC_X86_INSN_TYPE_MMX_REGM_REV: case ORC_X86_INSN_TYPE_REGM_REG: @@ -323,7 +355,8 @@ orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_SSEM_SSE: case ORC_X86_INSN_TYPE_IMM8_MMXM_MMX: if (xinsn->type == ORC_X86_RM_REG) { - sprintf(op1_str, "%%%s, ", orc_x86_get_regname_sse (xinsn->src)); + sprintf(op1_str, "%%%s, ", + orc_x86_get_regname_mmxsse (xinsn->src, is_sse)); } else if (xinsn->type == ORC_X86_RM_MEMOFFSET) { sprintf(op1_str, "%d(%%%s), ", xinsn->offset, orc_x86_get_regname_ptr (p, xinsn->src)); @@ -337,8 +370,10 @@ orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) } break; case ORC_X86_INSN_TYPE_MMXM_MMX_REV: /* FIXME misnamed */ + case ORC_X86_INSN_TYPE_SSEM_SSE_REV: case ORC_X86_INSN_TYPE_MMX_REGM_REV: - sprintf(op1_str, "%%%s, ", orc_x86_get_regname_sse (xinsn->src)); + sprintf(op1_str, "%%%s, ", + orc_x86_get_regname_mmxsse (xinsn->src, is_sse)); break; case ORC_X86_INSN_TYPE_REGM_MMX: case ORC_X86_INSN_TYPE_REGM_REG: @@ -394,11 +429,14 @@ orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_IMM8_REGM_MMX: case ORC_X86_INSN_TYPE_REGM_MMX: case ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT: - sprintf(op2_str, "%%%s", orc_x86_get_regname_sse (xinsn->dest)); + sprintf(op2_str, "%%%s", + orc_x86_get_regname_mmxsse (xinsn->dest, is_sse)); break; case ORC_X86_INSN_TYPE_MMXM_MMX_REV: + case ORC_X86_INSN_TYPE_SSEM_SSE_REV: if (xinsn->type == ORC_X86_RM_REG) { - sprintf(op2_str, "%%%s", orc_x86_get_regname_sse (xinsn->dest)); + sprintf(op2_str, "%%%s", + orc_x86_get_regname_mmxsse (xinsn->dest, is_sse)); } else if (xinsn->type == ORC_X86_RM_MEMOFFSET) { sprintf(op2_str, "%d(%%%s)", xinsn->offset, orc_x86_get_regname_ptr (p, xinsn->dest)); @@ -500,45 +538,53 @@ orc_uint8 nop_codes[][16] = { void orc_x86_insn_output_opcode (OrcCompiler *p, OrcX86Insn *xinsn) { + int is_sse; + + is_sse = FALSE; + if (is_sse_reg (xinsn->src) || is_sse_reg (xinsn->dest)) { + is_sse = TRUE; + } + switch (xinsn->opcode->type) { case ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT: - output_opcode (p, xinsn->opcode, 4, xinsn->dest, 0); + output_opcode (p, xinsn->opcode, 4, xinsn->dest, 0, is_sse); break; case ORC_X86_INSN_TYPE_MMX_REGM_REV: case ORC_X86_INSN_TYPE_MMXM_MMX_REV: - output_opcode (p, xinsn->opcode, 4, xinsn->dest, xinsn->src); + case ORC_X86_INSN_TYPE_SSEM_SSE_REV: + output_opcode (p, xinsn->opcode, 4, xinsn->dest, xinsn->src, is_sse); break; case ORC_X86_INSN_TYPE_REG_REGM: case ORC_X86_INSN_TYPE_IMM8_REGM: case ORC_X86_INSN_TYPE_IMM32_REGM: case ORC_X86_INSN_TYPE_REG8_REGM: case ORC_X86_INSN_TYPE_REG16_REGM: - output_opcode (p, xinsn->opcode, xinsn->size, xinsn->dest, xinsn->src); + output_opcode (p, xinsn->opcode, xinsn->size, xinsn->dest, xinsn->src, FALSE); break; case ORC_X86_INSN_TYPE_IMM8_MMXM_MMX: case ORC_X86_INSN_TYPE_MMXM_MMX: case ORC_X86_INSN_TYPE_SSEM_SSE: case ORC_X86_INSN_TYPE_REGM_MMX: - output_opcode (p, xinsn->opcode, 4, xinsn->src, xinsn->dest); + output_opcode (p, xinsn->opcode, 4, xinsn->src, xinsn->dest, is_sse); break; case ORC_X86_INSN_TYPE_IMM8_REGM_MMX: case ORC_X86_INSN_TYPE_MEM: case ORC_X86_INSN_TYPE_REGM_REG: case ORC_X86_INSN_TYPE_STACK: - output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest); + output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest, FALSE); break; case ORC_X86_INSN_TYPE_REGM: - output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest); + output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest, FALSE); break; case ORC_X86_INSN_TYPE_IMM32_REGM_MOV: orc_x86_emit_rex (p, xinsn->size, 0, 0, xinsn->dest); *p->codeptr++ = xinsn->opcode->code + (xinsn->dest&7); break; case ORC_X86_INSN_TYPE_NONE: - output_opcode (p, xinsn->opcode, 4, 0, 0); + output_opcode (p, xinsn->opcode, 4, 0, 0, FALSE); break; case ORC_X86_INSN_TYPE_IMM32_A: - output_opcode (p, xinsn->opcode, xinsn->size, 0, 0); + output_opcode (p, xinsn->opcode, xinsn->size, 0, 0, FALSE); break; case ORC_X86_INSN_TYPE_ALIGN: { @@ -583,6 +629,7 @@ orc_x86_insn_output_modrm (OrcCompiler *p, OrcX86Insn *xinsn) break; case ORC_X86_INSN_TYPE_REG_REGM: case ORC_X86_INSN_TYPE_MMXM_MMX_REV: + case ORC_X86_INSN_TYPE_SSEM_SSE_REV: case ORC_X86_INSN_TYPE_MMX_REGM_REV: case ORC_X86_INSN_TYPE_REG8_REGM: case ORC_X86_INSN_TYPE_REG16_REGM: @@ -691,6 +738,7 @@ orc_x86_insn_output_immediate (OrcCompiler *p, OrcX86Insn *xinsn) case ORC_X86_INSN_TYPE_MMXM_MMX: case ORC_X86_INSN_TYPE_REG_REGM: case ORC_X86_INSN_TYPE_MMXM_MMX_REV: + case ORC_X86_INSN_TYPE_SSEM_SSE_REV: case ORC_X86_INSN_TYPE_MMX_REGM_REV: case ORC_X86_INSN_TYPE_REG8_REGM: case ORC_X86_INSN_TYPE_REG16_REGM: diff --git a/orc/orcx86insn.h b/orc/orcx86insn.h index ad4be89..d109c95 100644 --- a/orc/orcx86insn.h +++ b/orc/orcx86insn.h @@ -15,6 +15,7 @@ typedef enum { ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, ORC_X86_INSN_TYPE_IMM8_MMX_REG_REV, ORC_X86_INSN_TYPE_MMXM_MMX_REV, + ORC_X86_INSN_TYPE_SSEM_SSE_REV, ORC_X86_INSN_TYPE_REGM_MMX, ORC_X86_INSN_TYPE_MMX_REGM_REV, ORC_X86_INSN_TYPE_REGM, @@ -180,13 +181,13 @@ typedef enum { ORC_X86_palignr, ORC_X86_pinsrw, ORC_X86_movd_load, - ORC_X86_movq_load, + ORC_X86_movq_sse_load, ORC_X86_movdqa_load, ORC_X86_movdqu_load, ORC_X86_movhps_load, ORC_X86_pextrw, ORC_X86_movd_store, - ORC_X86_movq_store, + ORC_X86_movq_sse_store, ORC_X86_movdqa_store, ORC_X86_movdqu_store, ORC_X86_movntdq_store, @@ -273,6 +274,9 @@ typedef enum { ORC_X86_sar, ORC_X86_and_imm32_a, ORC_X86_ALIGN, + ORC_X86_pshufw, + ORC_X86_movq_mmx_load, + ORC_X86_movq_mmx_store, } OrcX86Opcode; enum { @@ -449,39 +453,213 @@ void orc_x86_calculate_offsets (OrcCompiler *p); #define orc_sse_emit_pinsrw_memoffset(p,imm,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_pinsrw, 4, imm, offset, a, b) #define orc_sse_emit_movd_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movd_load, 4, 0, offset, a, b) -#define orc_sse_emit_movq_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movq_load, 4, 0, offset, a, b) +#define orc_sse_emit_movq_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movq_sse_load, 4, 0, offset, a, b) #define orc_sse_emit_movdqa_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movdqa_load, 4, 0, offset, a, b) #define orc_sse_emit_movdqu_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movdqu_load, 4, 0, offset, a, b) #define orc_sse_emit_movhps_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movhps_load, 4, 0, offset, a, b) #define orc_sse_emit_pextrw_memoffset(p,imm,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_pextrw, 16, imm, a, offset, b) #define orc_sse_emit_movd_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movd_store, 16, 0, a, offset, b) -#define orc_sse_emit_movq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movq_store, 16, 0, a, offset, b) +#define orc_sse_emit_movq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movq_sse_store, 16, 0, a, offset, b) #define orc_sse_emit_movdqa_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movdqa_store, 16, 0, a, offset, b) #define orc_sse_emit_movdqu_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movdqu_store, 16, 0, a, offset, b) #define orc_sse_emit_movntdq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movntdq_store, 16, 0, a, offset, b) #define orc_sse_emit_pinsrw_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrw, 4, imm, offset, a, a_index, shift, b) #define orc_sse_emit_movd_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movd_load, 4, 0, offset, a, a_index, shift, b) -#define orc_sse_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_load, 4, 0, offset, a, a_index, shift, b) +#define orc_sse_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_sse_load, 4, 0, offset, a, a_index, shift, b) #define orc_sse_emit_movdqa_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movdqa_load, 4, 0, offset, a, a_index, shift, b) #define orc_sse_emit_movdqu_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movdqu_load, 4, 0, offset, a, a_index, shift, b) #define orc_sse_emit_movhps_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movhps_load, 4, 0, offset, a, a_index, shift, b) #define orc_sse_emit_pextrw_memindex(p,imm,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_pextrw, imm, a, offset, b, b_index, shift) #define orc_sse_emit_movd_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movd_store, 0, a, offset, b, b_index, shift) -#define orc_sse_emit_movq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movq_store, 0, a, offset, b, b_index, shift) +#define orc_sse_emit_movq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movq_sse_store, 0, a, offset, b, b_index, shift) #define orc_sse_emit_movdqa_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movdqa_store, 0, a, offset, b, b_index, shift) #define orc_sse_emit_movdqu_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movdqu_store, 0, a, offset, b, b_index, shift) #define orc_sse_emit_movntdq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movntdq_store, 0, a, offset, b, b_index, shift) #define orc_sse_emit_pinsrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pinsrw, imm, a, b) #define orc_sse_emit_movd_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_load, 4, a, b) -#define orc_sse_emit_movq_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_load, 4, a, b) +#define orc_sse_emit_movq_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_sse_load, 4, a, b) #define orc_sse_emit_pextrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pextrw, imm, a, b) #define orc_sse_emit_movd_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_store, 4, a, b) -#define orc_sse_emit_movq_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_store, 4, a, b) +#define orc_sse_emit_movq_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_sse_store, 4, a, b) + + + + +#define orc_mmx_emit_punpcklbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpcklbw, 8, a, b) +#define orc_mmx_emit_punpcklwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpcklwd, 8, a, b) +#define orc_mmx_emit_punpckldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckldq, 8, a, b) +#define orc_mmx_emit_packsswb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packsswb, 8, a, b) +#define orc_mmx_emit_pcmpgtb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtb, 8, a, b) +#define orc_mmx_emit_pcmpgtw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtw, 8, a, b) +#define orc_mmx_emit_pcmpgtd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtd, 8, a, b) +#define orc_mmx_emit_packuswb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packuswb, 8, a, b) +#define orc_mmx_emit_punpckhbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhbw, 8, a, b) +#define orc_mmx_emit_punpckhwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhwd, 8, a, b) +#define orc_mmx_emit_punpckhdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhdq, 8, a, b) +#define orc_mmx_emit_packssdw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packssdw, 8, a, b) +#define orc_mmx_emit_punpcklqdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpcklqdq, 8, a, b) +#define orc_mmx_emit_punpckhqdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhqdq, 8, a, b) +#define orc_mmx_emit_psraw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psraw, 8, a, b) +#define orc_mmx_emit_psrlw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrlw, 8, a, b) +#define orc_mmx_emit_psllw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psllw, 8, a, b) +#define orc_mmx_emit_psrad(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrad, 8, a, b) +#define orc_mmx_emit_psrld(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrld, 8, a, b) +#define orc_mmx_emit_pslld(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pslld, 8, a, b) +#define orc_mmx_emit_psrlq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrlq, 8, a, b) +#define orc_mmx_emit_psllq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psllq, 8, a, b) +#define orc_mmx_emit_psrldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrldq, 8, a, b) +#define orc_mmx_emit_pslldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pslldq, 8, a, b) +#define orc_mmx_emit_psrlq_reg(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrlq_reg, 8, a, b) +#define orc_mmx_emit_pcmpeqb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqb, 8, a, b) +#define orc_mmx_emit_pcmpeqw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqw, 8, a, b) +#define orc_mmx_emit_pcmpeqd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqd, 8, a, b) +#define orc_mmx_emit_paddq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddq, 8, a, b) +#define orc_mmx_emit_pmullw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmullw, 8, a, b) +#define orc_mmx_emit_psubusb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubusb, 8, a, b) +#define orc_mmx_emit_psubusw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubusw, 8, a, b) +#define orc_mmx_emit_pminub(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminub, 8, a, b) +#define orc_mmx_emit_pand(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pand, 8, a, b) +#define orc_mmx_emit_paddusb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddusb, 8, a, b) +#define orc_mmx_emit_paddusw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddusw, 8, a, b) +#define orc_mmx_emit_pmaxub(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxub, 8, a, b) +#define orc_mmx_emit_pandn(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pandn, 8, a, b) +#define orc_mmx_emit_pavgb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pavgb, 8, a, b) +#define orc_mmx_emit_pavgw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pavgw, 8, a, b) +#define orc_mmx_emit_pmulhuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulhuw, 8, a, b) +#define orc_mmx_emit_pmulhw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulhw, 8, a, b) +#define orc_mmx_emit_psubsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubsb, 8, a, b) +#define orc_mmx_emit_psubsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubsw, 8, a, b) +#define orc_mmx_emit_pminsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminsw, 8, a, b) +#define orc_mmx_emit_por(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_por, 8, a, b) +#define orc_mmx_emit_paddsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddsb, 8, a, b) +#define orc_mmx_emit_paddsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddsw, 8, a, b) +#define orc_mmx_emit_pmaxsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxsw, 8, a, b) +#define orc_mmx_emit_pxor(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pxor, 8, a, b) +#define orc_mmx_emit_pmuludq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmuludq, 8, a, b) +#define orc_mmx_emit_pmaddwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaddwd, 8, a, b) +#define orc_mmx_emit_psadbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psadbw, 8, a, b) +#define orc_mmx_emit_psubb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubb, 8, a, b) +#define orc_mmx_emit_psubw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubw, 8, a, b) +#define orc_mmx_emit_psubd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubd, 8, a, b) +#define orc_mmx_emit_psubq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubq, 8, a, b) +#define orc_mmx_emit_paddb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddb, 8, a, b) +#define orc_mmx_emit_paddw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddw, 8, a, b) +#define orc_mmx_emit_paddd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddd, 8, a, b) +#define orc_mmx_emit_pshufb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pshufb, 8, a, b) +#define orc_mmx_emit_phaddw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phaddw, 8, a, b) +#define orc_mmx_emit_phaddd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phaddd, 8, a, b) +#define orc_mmx_emit_phaddsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phaddsw, 8, a, b) +#define orc_mmx_emit_pmaddubsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaddubsw, 8, a, b) +#define orc_mmx_emit_phsubw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phsubw, 8, a, b) +#define orc_mmx_emit_phsubd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phsubd, 8, a, b) +#define orc_mmx_emit_phsubsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phsubsw, 8, a, b) +#define orc_mmx_emit_psignb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psignb, 8, a, b) +#define orc_mmx_emit_psignw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psignw, 8, a, b) +#define orc_mmx_emit_psignd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psignd, 8, a, b) +#define orc_mmx_emit_pmulhrsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulhrsw, 8, a, b) +#define orc_mmx_emit_pabsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pabsb, 8, a, b) +#define orc_mmx_emit_pabsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pabsw, 8, a, b) +#define orc_mmx_emit_pabsd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pabsd, 8, a, b) +#define orc_mmx_emit_pmovsxbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxbw, 8, a, b) +#define orc_mmx_emit_pmovsxbd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxbd, 8, a, b) +#define orc_mmx_emit_pmovsxbq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxbq, 8, a, b) +#define orc_mmx_emit_pmovsxwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxwd, 8, a, b) +#define orc_mmx_emit_pmovsxwq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxwq, 8, a, b) +#define orc_mmx_emit_pmovsxdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxdq, 8, a, b) +#define orc_mmx_emit_pmuldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmuldq, 8, a, b) +#define orc_mmx_emit_pcmpeqq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqq, 8, a, b) +#define orc_mmx_emit_packusdw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packusdw, 8, a, b) +#define orc_mmx_emit_pmovzxbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxbw, 8, a, b) +#define orc_mmx_emit_pmovzxbd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxbd, 8, a, b) +#define orc_mmx_emit_pmovzxbq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxbq, 8, a, b) +#define orc_mmx_emit_pmovzxwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxwd, 8, a, b) +#define orc_mmx_emit_pmovzxwq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxwq, 8, a, b) +#define orc_mmx_emit_pmovzxdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxdq, 8, a, b) +#define orc_mmx_emit_pmulld(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulld, 8, a, b) +#define orc_mmx_emit_phminposuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phminposuw, 8, a, b) +#define orc_mmx_emit_pminsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminsb, 8, a, b) +#define orc_mmx_emit_pminsd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminsd, 8, a, b) +#define orc_mmx_emit_pminuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminuw, 8, a, b) +#define orc_mmx_emit_pminud(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminud, 8, a, b) +#define orc_mmx_emit_pmaxsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxsb, 8, a, b) +#define orc_mmx_emit_pmaxsd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxsd, 8, a, b) +#define orc_mmx_emit_pmaxuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxuw, 8, a, b) +#define orc_mmx_emit_pmaxud(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxud, 8, a, b) +#define orc_mmx_emit_pcmpgtq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtq, 8, a, b) +#define orc_mmx_emit_addps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_addps, 8, a, b) +#define orc_mmx_emit_subps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_subps, 8, a, b) +#define orc_mmx_emit_mulps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_mulps, 8, a, b) +#define orc_mmx_emit_divps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_divps, 8, a, b) +#define orc_mmx_emit_sqrtps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_sqrtps, 8, a, b) +#define orc_mmx_emit_addpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_addpd, 8, a, b) +#define orc_mmx_emit_subpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_subpd, 8, a, b) +#define orc_mmx_emit_mulpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_mulpd, 8, a, b) +#define orc_mmx_emit_divpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_divpd, 8, a, b) +#define orc_mmx_emit_sqrtpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_sqrtpd, 8, a, b) +#define orc_mmx_emit_cmpeqps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpeqps, 8, a, b) +#define orc_mmx_emit_cmpeqpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpeqpd, 8, a, b) +#define orc_mmx_emit_cmpltps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpltps, 8, a, b) +#define orc_mmx_emit_cmpltpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpltpd, 8, a, b) +#define orc_mmx_emit_cmpleps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpleps, 8, a, b) +#define orc_mmx_emit_cmplepd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmplepd, 8, a, b) +#define orc_mmx_emit_cvttps2dq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvttps2dq, 8, a, b) +#define orc_mmx_emit_cvttpd2dq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvttpd2dq, 8, a, b) +#define orc_mmx_emit_cvtdq2ps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtdq2ps, 8, a, b) +#define orc_mmx_emit_cvtdq2pd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtdq2pd, 8, a, b) +#define orc_mmx_emit_cvtps2pd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtps2pd, 8, a, b) +#define orc_mmx_emit_cvtpd2ps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtpd2ps, 8, a, b) +#define orc_mmx_emit_minps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_minps, 8, a, b) +#define orc_mmx_emit_minpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_minpd, 8, a, b) +#define orc_mmx_emit_maxps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_maxps, 8, a, b) +#define orc_mmx_emit_maxpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_maxpd, 8, a, b) +#define orc_mmx_emit_psraw_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psraw_imm, imm, 0, b) +#define orc_mmx_emit_psrlw_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrlw_imm, imm, 0, b) +#define orc_mmx_emit_psllw_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psllw_imm, imm, 0, b) +#define orc_mmx_emit_psrad_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrad_imm, imm, 0, b) +#define orc_mmx_emit_psrld_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrld_imm, imm, 0, b) +#define orc_mmx_emit_pslld_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pslld_imm, imm, 0, b) +#define orc_mmx_emit_psrlq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrlq_imm, imm, 0, b) +#define orc_mmx_emit_psllq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psllq_imm, imm, 0, b) +#define orc_mmx_emit_psrldq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrldq_imm, imm, 0, b) +#define orc_mmx_emit_pslldq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pslldq_imm, imm, 0, b) +#define orc_mmx_emit_pshufd(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshufd, imm, a, b) +#define orc_mmx_emit_pshuflw(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshuflw, imm, a, b) +#define orc_mmx_emit_pshufhw(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshufhw, imm, a, b) +#define orc_mmx_emit_palignr(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psalignr, imm, a, b) + +#define orc_mmx_emit_pinsrw_memoffset(p,imm,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_pinsrw, 4, imm, offset, a, b) +#define orc_mmx_emit_movd_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movd_load, 4, 0, offset, a, b) +#define orc_mmx_emit_movq_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movq_mmx_load, 4, 0, offset, a, b) + +#define orc_mmx_emit_pextrw_memoffset(p,imm,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_pextrw, 8, imm, a, offset, b) +#define orc_mmx_emit_movd_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movd_store, 8, 0, a, offset, b) +#define orc_mmx_emit_movq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movq_mmx_store, 8, 0, a, offset, b) + +#define orc_mmx_emit_pinsrw_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrw, 4, imm, offset, a, a_index, shift, b) +#define orc_mmx_emit_movd_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movd_load, 4, 0, offset, a, a_index, shift, b) +#define orc_mmx_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_mmx_load, 4, 0, offset, a, a_index, shift, b) + +#define orc_mmx_emit_pextrw_memindex(p,imm,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_pextrw, imm, a, offset, b, b_index, shift) +#define orc_mmx_emit_movd_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movd_store, 0, a, offset, b, b_index, shift) +#define orc_mmx_emit_movq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movq_mmx_store, 0, a, offset, b, b_index, shift) + +#define orc_mmx_emit_pinsrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pinsrw, imm, a, b) +#define orc_mmx_emit_movd_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_load, 4, a, b) +#define orc_mmx_emit_movq_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_mmx_load, 4, a, b) + +#define orc_mmx_emit_pextrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pextrw, imm, a, b) +#define orc_mmx_emit_movd_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_store, 4, a, b) +#define orc_mmx_emit_movq_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_mmx_store, 4, a, b) + + +#define orc_mmx_emit_pshufw(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshufw, imm, a, b) +#define orc_mmx_emit_movq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_mmx_load, 8, a, b) + #endif -- 2.7.4