}
}
-void
-orc_mmx_emit_f20f (OrcCompiler *p, const char *insn_name, int code,
- int src, int dest)
-{
- ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
- orc_x86_get_regname_mmx(src),
- orc_x86_get_regname_mmx(dest));
- *p->codeptr++ = 0xf2;
- orc_x86_emit_rex (p, 0, dest, 0, src);
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = code;
- orc_x86_emit_modrm_reg (p, src, dest);
-}
-
-void
-orc_mmx_emit_f30f (OrcCompiler *p, const char *insn_name, int code,
- int src, int dest)
-{
- ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
- orc_x86_get_regname_mmx(src),
- orc_x86_get_regname_mmx(dest));
- *p->codeptr++ = 0xf3;
- orc_x86_emit_rex (p, 0, dest, 0, src);
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = code;
- orc_x86_emit_modrm_reg (p, src, dest);
-}
-
-void
-orc_mmx_emit_0f (OrcCompiler *p, const char *insn_name, int code,
- int src, int dest)
-{
- ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
- orc_x86_get_regname_mmx(src),
- orc_x86_get_regname_mmx(dest));
- orc_x86_emit_rex (p, 0, dest, 0, src);
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = code;
- orc_x86_emit_modrm_reg (p, src, dest);
-}
-
-void
-orc_mmx_emit_660f (OrcCompiler *p, const char *insn_name, int code,
- int src, int dest)
-{
- ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
- orc_x86_get_regname_mmx(src),
- orc_x86_get_regname_mmx(dest));
- orc_x86_emit_rex (p, 0, dest, 0, src);
- *p->codeptr++ = 0x0f;
- if (code & 0xff00) {
- *p->codeptr++ = code >> 8;
- }
- *p->codeptr++ = code & 0xff;
- orc_x86_emit_modrm_reg (p, src, dest);
-}
-
-void
-orc_mmx_emit_pshufw (OrcCompiler *p, int shuf, int src, int dest)
-{
- ORC_ASM_CODE(p," pshufw $0x%04x, %%%s, %%%s\n", shuf,
- orc_x86_get_regname_mmx(src),
- orc_x86_get_regname_mmx(dest));
- orc_x86_emit_rex (p, 0, dest, 0, src);
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0x70;
- orc_x86_emit_modrm_reg (p, src, dest);
- *p->codeptr++ = shuf;
-}
-
-void
-orc_mmx_emit_pinsrw_memoffset (OrcCompiler *p, int imm, int offset,
- int src, int dest)
-{
- ORC_ASM_CODE(p," pinsrw $%d, %d(%%%s), %%%s\n", imm, offset,
- orc_x86_get_regname_ptr(p, src),
- orc_x86_get_regname_mmx(dest));
- orc_x86_emit_rex (p, 0, dest, 0, src);
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0xc4;
- orc_x86_emit_modrm_memoffset (p, offset, src, dest);
- *p->codeptr++ = imm;
-
-}
-
-void
-orc_mmx_emit_pextrw_memoffset (OrcCompiler *p, int imm, int src,
- int offset, int dest)
-{
- ORC_ASM_CODE(p," pextrw $%d, %%%s, %d(%%%s)\n", imm,
- orc_x86_get_regname_ptr(p, src),
- offset, orc_x86_get_regname_mmx(dest));
- orc_x86_emit_rex (p, 0, src, 0, dest);
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0xc4;
- orc_x86_emit_modrm_memoffset (p, offset, dest, src);
- *p->codeptr++ = imm;
-}
-
-void
-orc_mmx_emit_shiftimm (OrcCompiler *p, const char *insn_name, int code,
- int modrm_code, int shift, int reg)
-{
- ORC_ASM_CODE(p," %s $%d, %%%s\n", insn_name, shift,
- orc_x86_get_regname_mmx(reg));
- orc_x86_emit_rex (p, 0, 0, 0, reg);
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = code;
- orc_x86_emit_modrm_reg (p, reg, modrm_code);
- *p->codeptr++ = shift;
-}
-
+#if 0
void
orc_x86_emit_mov_memindex_mmx (OrcCompiler *compiler, int size, int offset,
int reg1, int regindex, int shift, int reg2, int is_aligned)
orc_x86_emit_modrm_memoffset (compiler, offset, reg2, reg1);
}
+#endif
-void orc_x86_emit_mov_mmx_reg_reg (OrcCompiler *compiler, int reg1, int reg2)
+void
+orc_x86_emit_mov_memoffset_mmx (OrcCompiler *compiler, int size, int offset,
+ int reg1, int reg2, int is_aligned)
{
- ORC_ASM_CODE(compiler," movq %%%s, %%%s\n", orc_x86_get_regname_mmx(reg1),
- orc_x86_get_regname_mmx(reg2));
-
- orc_x86_emit_rex(compiler, 0, reg1, 0, reg2);
- *compiler->codeptr++ = 0x0f;
- *compiler->codeptr++ = 0x6f;
- orc_x86_emit_modrm_reg (compiler, reg1, reg2);
+ switch (size) {
+ case 4:
+ orc_mmx_emit_movd_load_memoffset (compiler, offset, reg1, reg2);
+ break;
+ case 8:
+ orc_mmx_emit_movq_load_memoffset (compiler, offset, reg1, reg2);
+ break;
+ default:
+ ORC_COMPILER_ERROR(compiler, "bad size");
+ break;
+ }
}
-void orc_x86_emit_mov_reg_mmx (OrcCompiler *compiler, int reg1, int reg2)
+void
+orc_x86_emit_mov_memindex_mmx (OrcCompiler *compiler, int size, int offset,
+ int reg1, int regindex, int shift, int reg2, int is_aligned)
{
- ORC_ASM_CODE(compiler," movd %%%s, %%%s\n", orc_x86_get_regname(reg1),
- orc_x86_get_regname_mmx(reg2));
- orc_x86_emit_rex(compiler, 0, reg2, 0, reg1);
- *compiler->codeptr++ = 0x0f;
- *compiler->codeptr++ = 0x6e;
- orc_x86_emit_modrm_reg (compiler, reg1, reg2);
+ switch (size) {
+ case 4:
+ orc_mmx_emit_movd_load_memindex (compiler, offset,
+ reg1, regindex, shift, reg2);
+ break;
+ case 8:
+ orc_mmx_emit_movq_load_memindex (compiler, offset,
+ reg1, regindex, shift, reg2);
+ break;
+ default:
+ ORC_COMPILER_ERROR(compiler, "bad size");
+ break;
+ }
}
-void orc_x86_emit_mov_mmx_reg (OrcCompiler *compiler, int reg1, int reg2)
+void
+orc_x86_emit_mov_mmx_memoffset (OrcCompiler *compiler, int size, int reg1,
+ int offset, int reg2, int aligned, int uncached)
{
- ORC_ASM_CODE(compiler," movd %%%s, %%%s\n", orc_x86_get_regname_mmx(reg1),
- orc_x86_get_regname(reg2));
- orc_x86_emit_rex(compiler, 0, reg1, 0, reg2);
- *compiler->codeptr++ = 0x0f;
- *compiler->codeptr++ = 0x7e;
- orc_x86_emit_modrm_reg (compiler, reg2, reg1);
+ switch (size) {
+ case 4:
+ orc_mmx_emit_movd_store_memoffset (compiler, offset, reg1, reg2);
+ break;
+ case 8:
+ orc_mmx_emit_movq_store_memoffset (compiler, offset, reg1, reg2);
+ break;
+ default:
+ ORC_COMPILER_ERROR(compiler, "bad size");
+ break;
+ }
+
}
#define _ORC_MMX_H_
#include <orc/orcx86.h>
+#include <orc/orcx86insn.h>
ORC_BEGIN_DECLS
int reg1, int regindex, int shift, int reg2, int is_aligned);
void orc_x86_emit_mov_mmx_memoffset (OrcCompiler *compiler, int size, int reg1, int offset,
int reg2, int aligned, int uncached);
+#if 0
void orc_x86_emit_mov_mmx_reg_reg (OrcCompiler *compiler, int reg1, int reg2);
void orc_x86_emit_mov_reg_mmx (OrcCompiler *compiler, int reg1, int reg2);
void orc_x86_emit_mov_mmx_reg (OrcCompiler *compiler, int reg1, int reg2);
int offset, int dest);
void orc_mmx_emit_shiftimm (OrcCompiler *p, const char *insn_name,
int code, int modrm_code, int shift, int reg);
+#endif
unsigned int orc_mmx_get_cpu_flags (void);
void orc_mmx_load_constant (OrcCompiler *compiler, int reg, int size,
orc_uint64 value);
-/* MMX instructions */
-#define orc_mmx_emit_punpcklbw(p,a,b) orc_mmx_emit_660f (p, "punpcklbw", 0x60, a, b)
-#define orc_mmx_emit_punpcklwd(p,a,b) orc_mmx_emit_660f (p, "punpcklwd", 0x61, a, b)
-#define orc_mmx_emit_punpckldq(p,a,b) orc_mmx_emit_660f (p, "punpckldq", 0x62, a, b)
-#define orc_mmx_emit_packsswb(p,a,b) orc_mmx_emit_660f (p, "packsswb", 0x63, a, b)
-#define orc_mmx_emit_pcmpgtb(p,a,b) orc_mmx_emit_660f (p, "pcmpgtb", 0x64, a, b)
-#define orc_mmx_emit_pcmpgtw(p,a,b) orc_mmx_emit_660f (p, "pcmpgtw", 0x65, a, b)
-#define orc_mmx_emit_pcmpgtd(p,a,b) orc_mmx_emit_660f (p, "pcmpgtd", 0x66, a, b)
-#define orc_mmx_emit_packuswb(p,a,b) orc_mmx_emit_660f (p, "packuswb", 0x67, a, b)
-#define orc_mmx_emit_punpckhbw(p,a,b) orc_mmx_emit_660f (p, "punpckhbw", 0x68, a, b)
-#define orc_mmx_emit_punpckhwd(p,a,b) orc_mmx_emit_660f (p, "punpckhwd", 0x69, a, b)
-#define orc_mmx_emit_punpckhdq(p,a,b) orc_mmx_emit_660f (p, "punpckhdq", 0x6a, a, b)
-#define orc_mmx_emit_packssdw(p,a,b) orc_mmx_emit_660f (p, "packssdw", 0x6b, a, b)
-
-#define orc_mmx_emit_movq(p,a,b) orc_mmx_emit_660f (p, "movq", 0x6f, a, b)
-
-#define orc_mmx_emit_psraw_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psraw", 0x71, 4, a, b)
-#define orc_mmx_emit_psrlw_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrlw", 0x71, 2, a, b)
-#define orc_mmx_emit_psllw_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psllw", 0x71, 6, a, b)
-
-#define orc_mmx_emit_psrad_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrad", 0x72, 4, a, b)
-#define orc_mmx_emit_psrld_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrld", 0x72, 2, a, b)
-#define orc_mmx_emit_pslld_imm(p,a,b) orc_mmx_emit_shiftimm (p, "pslld", 0x72, 6, a, b)
-
-#define orc_mmx_emit_psrlq_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psrlq", 0x73, 2, a, b)
-#define orc_mmx_emit_psllq_imm(p,a,b) orc_mmx_emit_shiftimm (p, "psllq", 0x73, 6, a, b)
-
-#define orc_mmx_emit_pcmpeqb(p,a,b) orc_mmx_emit_660f (p, "pcmpeqb", 0x74, a, b)
-#define orc_mmx_emit_pcmpeqw(p,a,b) orc_mmx_emit_660f (p, "pcmpeqw", 0x75, a, b)
-#define orc_mmx_emit_pcmpeqd(p,a,b) orc_mmx_emit_660f (p, "pcmpeqd", 0x76, a, b)
-#define orc_mmx_emit_emms(p) orc_x86_emit_emms (p)
-
-#define orc_mmx_emit_pmullw(p,a,b) orc_mmx_emit_660f (p, "pmullw", 0xd5, a, b)
-
-#define orc_mmx_emit_psubusb(p,a,b) orc_mmx_emit_660f (p, "psubusb", 0xd8, a, b)
-#define orc_mmx_emit_psubusw(p,a,b) orc_mmx_emit_660f (p, "psubusw", 0xd9, a, b)
-#define orc_mmx_emit_pand(p,a,b) orc_mmx_emit_660f (p, "pand", 0xdb, a, b)
-#define orc_mmx_emit_paddusb(p,a,b) orc_mmx_emit_660f (p, "paddusb", 0xdc, a, b)
-#define orc_mmx_emit_paddusw(p,a,b) orc_mmx_emit_660f (p, "paddusw", 0xdd, a, b)
-#define orc_mmx_emit_pandn(p,a,b) orc_mmx_emit_660f (p, "pandn", 0xdf, a, b)
-
-#define orc_mmx_emit_pmulhw(p,a,b) orc_mmx_emit_660f (p, "pmulhw", 0xe5, a, b)
-
-#define orc_mmx_emit_psubsb(p,a,b) orc_mmx_emit_660f (p, "psubsb", 0xe8, a, b)
-#define orc_mmx_emit_psubsw(p,a,b) orc_mmx_emit_660f (p, "psubsw", 0xe9, a, b)
-#define orc_mmx_emit_por(p,a,b) orc_mmx_emit_660f (p, "por", 0xeb, a, b)
-#define orc_mmx_emit_paddsb(p,a,b) orc_mmx_emit_660f (p, "paddsb", 0xec, a, b)
-#define orc_mmx_emit_paddsw(p,a,b) orc_mmx_emit_660f (p, "paddsw", 0xed, a, b)
-#define orc_mmx_emit_pxor(p,a,b) orc_mmx_emit_660f (p, "pxor", 0xef, a, b)
-
-#define orc_mmx_emit_pmaddwd(p,a,b) orc_mmx_emit_660f (p, "pmaddwd", 0xf5, a, b)
-
-#define orc_mmx_emit_psubb(p,a,b) orc_mmx_emit_660f (p, "psubb", 0xf8, a, b)
-#define orc_mmx_emit_psubw(p,a,b) orc_mmx_emit_660f (p, "psubw", 0xf9, a, b)
-#define orc_mmx_emit_psubd(p,a,b) orc_mmx_emit_660f (p, "psubd", 0xfa, a, b)
-
-#define orc_mmx_emit_paddb(p,a,b) orc_mmx_emit_660f (p, "paddb", 0xfc, a, b)
-#define orc_mmx_emit_paddw(p,a,b) orc_mmx_emit_660f (p, "paddw", 0xfd, a, b)
-#define orc_mmx_emit_paddd(p,a,b) orc_mmx_emit_660f (p, "paddd", 0xfe, a, b)
-
-/* MMX EXT instructions */
-/* MMX SSE instructions */
-#define orc_mmx_emit_pinsrw(p,a,b) orc_mmx_emit_660f (p, "pinsrw", 0xc4, a, b)
-#define orc_mmx_emit_pextrw(p,a,b) orc_mmx_emit_660f (p, "pextrw", 0xc5, a, b)
-#define orc_mmx_emit_pminub(p,a,b) orc_mmx_emit_660f (p, "pminub", 0xda, a, b)
-#define orc_mmx_emit_pmaxub(p,a,b) orc_mmx_emit_660f (p, "pmaxub", 0xde, a, b)
-#define orc_mmx_emit_pavgb(p,a,b) orc_mmx_emit_660f (p, "pavgb", 0xe0, a, b)
-#define orc_mmx_emit_pavgw(p,a,b) orc_mmx_emit_660f (p, "pavgw", 0xe3, a, b)
-#define orc_mmx_emit_pmulhuw(p,a,b) orc_mmx_emit_660f (p, "pmulhuw", 0xe4, a, b)
-#define orc_mmx_emit_pminsw(p,a,b) orc_mmx_emit_660f (p, "pminsw", 0xea, a, b)
-#define orc_mmx_emit_pmaxsw(p,a,b) orc_mmx_emit_660f (p, "pmaxsw", 0xee, a, b)
-#define orc_mmx_emit_psadbw(p,a,b) orc_mmx_emit_660f (p, "psadbw", 0xf6, a, b)
-
-/* 3DNow! instrunctions, FIXME, not correct */
-#define orc_mmx_emit_pmulhrw(p,a,b) orc_mmx_emit_660f (p, "pmulhrw", 0xb7, a, b)
-#define orc_mmx_emit_pavgusb(p,a,b) orc_mmx_emit_660f (p, "pavgusb", 0xbf, a, b)
-
-/* MMX SSE2 instructions */
-#define orc_mmx_emit_paddq(p,a,b) orc_mmx_emit_660f (p, "paddq", 0xd4, a, b)
-#define orc_mmx_emit_psubq(p,a,b) orc_mmx_emit_660f (p, "psubq", 0xfb, a, b)
-#define orc_mmx_emit_pmuludq(p,a,b) orc_mmx_emit_660f (p, "pmuludq", 0xf4, a, b)
-
-/* MMX SSE3 instructions */
-
-/* MMX SSSE3 instructions */
-#define orc_mmx_emit_pshufb(p,a,b) orc_mmx_emit_660f (p, "pshufb", 0x3800, a, b)
-#define orc_mmx_emit_phaddw(p,a,b) orc_mmx_emit_660f (p, "phaddw", 0x3801, a, b)
-#define orc_mmx_emit_phaddd(p,a,b) orc_mmx_emit_660f (p, "phaddd", 0x3802, a, b)
-#define orc_mmx_emit_phaddsw(p,a,b) orc_mmx_emit_660f (p, "phaddsw", 0x3803, a, b)
-#define orc_mmx_emit_pmaddubsw(p,a,b) orc_mmx_emit_660f (p, "pmaddubsw", 0x3804, a, b)
-#define orc_mmx_emit_phsubw(p,a,b) orc_mmx_emit_660f (p, "phsubw", 0x3805, a, b)
-#define orc_mmx_emit_phsubd(p,a,b) orc_mmx_emit_660f (p, "phsubd", 0x3806, a, b)
-#define orc_mmx_emit_phsubsw(p,a,b) orc_mmx_emit_660f (p, "phsubsw", 0x3807, a, b)
-#define orc_mmx_emit_psignb(p,a,b) orc_mmx_emit_660f (p, "psignb", 0x3808, a, b)
-#define orc_mmx_emit_psignw(p,a,b) orc_mmx_emit_660f (p, "psignw", 0x3809, a, b)
-#define orc_mmx_emit_psignd(p,a,b) orc_mmx_emit_660f (p, "psignd", 0x380a, a, b)
-#define orc_mmx_emit_pmulhrsw(p,a,b) orc_mmx_emit_660f (p, "pmulhrsw", 0x380b, a, b)
-
-#define orc_mmx_emit_pabsb(p,a,b) orc_mmx_emit_660f (p, "pabsb", 0x381c, a, b)
-#define orc_mmx_emit_pabsw(p,a,b) orc_mmx_emit_660f (p, "pabsw", 0x381d, a, b)
-#define orc_mmx_emit_pabsd(p,a,b) orc_mmx_emit_660f (p, "pabsd", 0x381e, a, b)
-
-/* MMX SSE4.1 instructions */
-#define orc_mmx_emit_pmovsxbw(p,a,b) orc_mmx_emit_660f (p, "pmovsxbw", 0x3820, a, b)
-#define orc_mmx_emit_pmovsxbd(p,a,b) orc_mmx_emit_660f (p, "pmovsxbd", 0x3821, a, b)
-#define orc_mmx_emit_pmovsxbq(p,a,b) orc_mmx_emit_660f (p, "pmovsxbq", 0x3822, a, b)
-#define orc_mmx_emit_pmovsxwd(p,a,b) orc_mmx_emit_660f (p, "pmovsxwd", 0x3823, a, b)
-#define orc_mmx_emit_pmovsxwq(p,a,b) orc_mmx_emit_660f (p, "pmovsxwq", 0x3824, a, b)
-#define orc_mmx_emit_pmovsxdq(p,a,b) orc_mmx_emit_660f (p, "pmovsxdq", 0x3825, a, b)
-
-#define orc_mmx_emit_pmuldq(p,a,b) orc_mmx_emit_660f (p, "pmuldq", 0x3828, a, b)
-#define orc_mmx_emit_pcmpeqq(p,a,b) orc_mmx_emit_660f (p, "pcmpeqq", 0x3829, a, b)
-
-#define orc_mmx_emit_packusdw(p,a,b) orc_mmx_emit_660f (p, "packusdw", 0x382b, a, b)
-
-#define orc_mmx_emit_pmovzxbw(p,a,b) orc_mmx_emit_660f (p, "pmovzxbw", 0x3830, a, b)
-#define orc_mmx_emit_pmovzxbd(p,a,b) orc_mmx_emit_660f (p, "pmovzxbd", 0x3831, a, b)
-#define orc_mmx_emit_pmovzxbq(p,a,b) orc_mmx_emit_660f (p, "pmovzxbq", 0x3832, a, b)
-#define orc_mmx_emit_pmovzxwd(p,a,b) orc_mmx_emit_660f (p, "pmovzxwd", 0x3833, a, b)
-#define orc_mmx_emit_pmovzxwq(p,a,b) orc_mmx_emit_660f (p, "pmovzxwq", 0x3834, a, b)
-#define orc_mmx_emit_pmovzxdq(p,a,b) orc_mmx_emit_660f (p, "pmovzxdq", 0x3835, a, b)
-
-#define orc_mmx_emit_pmulld(p,a,b) orc_mmx_emit_660f (p, "pmuldq", 0x3840, a, b)
-#define orc_mmx_emit_phminposuw(p,a,b) orc_mmx_emit_660f (p, "phminposuw", 0x3841, a, b)
-
-#define orc_mmx_emit_pminsb(p,a,b) orc_mmx_emit_660f (p, "pminsb", 0x3838, a, b)
-#define orc_mmx_emit_pminsd(p,a,b) orc_mmx_emit_660f (p, "pminsd", 0x3839, a, b)
-#define orc_mmx_emit_pminuw(p,a,b) orc_mmx_emit_660f (p, "pminuw", 0x383a, a, b)
-#define orc_mmx_emit_pminud(p,a,b) orc_mmx_emit_660f (p, "pminud", 0x383b, a, b)
-#define orc_mmx_emit_pmaxsb(p,a,b) orc_mmx_emit_660f (p, "pmaxsb", 0x383c, a, b)
-#define orc_mmx_emit_pmaxsd(p,a,b) orc_mmx_emit_660f (p, "pmaxsd", 0x383d, a, b)
-#define orc_mmx_emit_pmaxuw(p,a,b) orc_mmx_emit_660f (p, "pmaxuw", 0x383e, a, b)
-#define orc_mmx_emit_pmaxud(p,a,b) orc_mmx_emit_660f (p, "pmaxud", 0x383f, a, b)
-
-/* SSE4.2 instructions */
-#define orc_mmx_emit_pcmpgtq(p,a,b) orc_mmx_emit_660f (p, "pcmpgtq", 0x3837, a, b)
-
#endif
ORC_END_DECLS
}
if (compiler->vars[i].size == 2) {
- orc_x86_emit_mov_mmx_reg (compiler, src, compiler->gp_tmpreg);
+ orc_mmx_emit_movd_store_register (compiler, src, compiler->gp_tmpreg);
orc_x86_emit_and_imm_reg (compiler, 4, 0xffff, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg,
(int)ORC_STRUCT_OFFSET(OrcExecutor, accumulators[i-ORC_VAR_A1]),
}
orc_x86_emit_mov_imm_reg (compiler, 4, value, compiler->gp_tmpreg);
- orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, reg);
+ orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, reg);
#ifndef MMX
orc_mmx_emit_pshufd (compiler, ORC_MMX_SHUF(0,0,0,0), reg, reg);
#else
if (!(insn->opcode->flags & (ORC_STATIC_OPCODE_ACCUMULATOR|ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_STORE)) &&
compiler->vars[insn->dest_args[0]].alloc !=
compiler->vars[insn->src_args[0]].alloc) {
- orc_x86_emit_mov_mmx_reg_reg (compiler,
+#ifdef MMX
+ orc_mmx_emit_movq (compiler,
compiler->vars[insn->src_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc);
+#else
+ orc_mmx_emit_movdqu (compiler,
+ compiler->vars[insn->src_args[0]].alloc,
+ compiler->vars[insn->dest_args[0]].alloc);
+#endif
}
rule->emit (compiler, rule->emit_user, insn);
} else {
if (!(insn->opcode->flags & (ORC_STATIC_OPCODE_ACCUMULATOR|ORC_STATIC_OPCODE_LOAD|ORC_STATIC_OPCODE_STORE)) &&
compiler->vars[insn->dest_args[0]].alloc !=
compiler->vars[insn->src_args[0]].alloc) {
+#ifdef MMX
+ orc_sse_emit_movq (compiler,
+ compiler->vars[insn->src_args[0]].alloc,
+ compiler->vars[insn->dest_args[0]].alloc);
+#else
orc_sse_emit_movdqu (compiler,
compiler->vars[insn->src_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc);
+#endif
}
rule->emit (compiler, rule->emit_user, insn);
} else {
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]),
compiler->exec_reg, reg, FALSE);
#ifndef MMX
- orc_x86_emit_movhps_memoffset_mmx (compiler,
+ orc_mmx_emit_movhps_load_memoffset (compiler,
(int)ORC_STRUCT_OFFSET(OrcExecutor,
params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]),
compiler->exec_reg, reg);
case 1:
orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg,
compiler->gp_tmpreg);
- orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, dest->alloc);
+ orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, dest->alloc);
break;
case 2:
orc_mmx_emit_pxor (compiler, dest->alloc, dest->alloc);
case 1:
orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg,
compiler->gp_tmpreg);
- orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, dest->alloc);
+ orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, dest->alloc);
break;
case 2:
orc_mmx_emit_pxor (compiler, dest->alloc, dest->alloc);
case 2:
orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg,
compiler->gp_tmpreg);
- orc_x86_emit_mov_reg_mmx (compiler, compiler->gp_tmpreg, dest->alloc);
+ orc_mmx_emit_movd_load_register (compiler, compiler->gp_tmpreg, dest->alloc);
break;
case 4:
orc_mmx_emit_pinsrw_memoffset (compiler, 0, offset, ptr_reg, dest->alloc);
if (ptr_reg == compiler->gp_tmpreg) {
ORC_COMPILER_ERROR(compiler,"unimplemented");
}
- orc_x86_emit_mov_mmx_reg (compiler, src->alloc, compiler->gp_tmpreg);
+ orc_mmx_emit_movd_store_register (compiler, src->alloc, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg,
offset, ptr_reg);
break;
if (ptr_reg == compiler->gp_tmpreg) {
ORC_COMPILER_ERROR(compiler,"unimplemented");
}
- orc_x86_emit_mov_mmx_reg (compiler, src->alloc, compiler->gp_tmpreg);
+ orc_mmx_emit_movd_store_register (compiler, src->alloc, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg,
offset, ptr_reg);
}
int tmp2 = orc_compiler_get_temp_reg (compiler);
int tmpc;
- orc_x86_emit_mov_mmx_reg (compiler, X86_MM6, compiler->gp_tmpreg);
+ orc_mmx_emit_movd_store_register (compiler, X86_MM6, compiler->gp_tmpreg);
orc_x86_emit_sar_imm_reg (compiler, 4, 16, compiler->gp_tmpreg);
orc_mmx_emit_movdqu_load_memindex (compiler, 0, src->ptr_register,
orc_mmx_emit_pshufd (compiler, ORC_MMX_SHUF(3,2,3,2), tmp, tmp2);
orc_mmx_emit_psubw (compiler, tmp, tmp2);
- orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp);
+ orc_mmx_emit_movd_load_register (compiler, src->ptr_offset, tmp);
orc_mmx_emit_pshuflw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp);
orc_mmx_emit_psrlw_imm (compiler, 8, tmp);
orc_mmx_emit_pmullw (compiler, tmp2, tmp);
for(i=0;i<(1<<compiler->loop_shift);i+=2){
orc_x86_emit_mov_memoffset_mmx (compiler, 8, 0,
src->ptr_register, tmp, FALSE);
- orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp4);
+ orc_mmx_emit_movd_load_register (compiler, src->ptr_offset, tmp4);
if (compiler->vars[increment_var].vartype == ORC_VAR_TYPE_PARAM) {
orc_x86_emit_add_memoffset_reg (compiler, 4,
orc_mmx_emit_punpcklbw (compiler, zero, tmp2);
orc_mmx_emit_psubw (compiler, tmp, tmp2);
- orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp);
+ orc_mmx_emit_movd_load_register (compiler, src->ptr_offset, tmp);
orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp);
orc_mmx_emit_psrlw_imm (compiler, 8, tmp);
orc_mmx_emit_pmullw (compiler, tmp2, tmp);
tmpc = orc_compiler_get_temp_constant (p, 1<<type, 1);
if (src == dest) {
- orc_x86_emit_cpuinsn (p, opcodes[type], 0, src, tmpc);
+ orc_x86_emit_cpuinsn_size (p, opcodes[type], 16, src, tmpc);
orc_mmx_emit_movq (p, tmpc, dest);
} else {
/* FIXME this would be a good opportunity to not chain src to dest */
orc_mmx_emit_movq (p, tmpc, dest);
- orc_x86_emit_cpuinsn (p, opcodes[type], 0, src, dest);
+ orc_x86_emit_cpuinsn_size (p, opcodes[type], 16, src, dest);
}
}
#endif
}
-#ifdef MMX
-static void
-mmx_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
-{
- int type = ORC_PTR_TO_INT(user);
- int imm_code1[] = { 0x71, 0x71, 0x71, 0x72, 0x72, 0x72, 0x73, 0x73 };
- int imm_code2[] = { 6, 2, 4, 6, 2, 4, 6, 2 };
- int reg_code[] = { 0xf1, 0xd1, 0xe1, 0xf2, 0xd2, 0xe2, 0xf3, 0xd3 };
- const char *code[] = { "psllw", "psrlw", "psraw", "pslld", "psrld", "psrad", "psllq", "psrlq" };
-
- if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_mmx_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type],
- p->vars[insn->src_args[1]].value.i,
- p->vars[insn->dest_args[0]].alloc);
- } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
- int tmp = orc_compiler_get_temp_reg (p);
-
- /* FIXME this is a gross hack to reload the register with a
- * 64-bit version of the parameter. */
- orc_x86_emit_mov_memoffset_mmx (p, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]),
- p->exec_reg, tmp, FALSE);
-
- orc_mmx_emit_660f (p, code[type], reg_code[type], tmp,
- p->vars[insn->dest_args[0]].alloc);
- } else {
- ORC_COMPILER_ERROR(p,"rule only works with constants or params");
- p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE;
- }
-}
-#else
static void
mmx_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
{
ORC_X86_psrad_imm, ORC_X86_psllq_imm, ORC_X86_psrlq_imm };
if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_x86_emit_cpuinsn (p, opcodes_imm[type],
- p->vars[insn->src_args[1]].value.i, 0,
+ orc_x86_emit_cpuinsn_imm (p, opcodes_imm[type],
+ p->vars[insn->src_args[1]].value.i, 16,
p->vars[insn->dest_args[0]].alloc);
} else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
int tmp = orc_compiler_get_temp_reg (p);
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]),
p->exec_reg, tmp, FALSE);
- orc_x86_emit_cpuinsn (p, opcodes[type], 0, tmp,
+ orc_x86_emit_cpuinsn_size (p, opcodes[type], 16, tmp,
p->vars[insn->dest_args[0]].alloc);
} else {
ORC_COMPILER_ERROR(p,"rule only works with constants or params");
p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE;
}
}
-#endif
static void
mmx_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn)
orc_mmx_emit_punpcklbw (compiler, zero, tmp2);
orc_mmx_emit_psubw (compiler, tmp, tmp2);
- orc_x86_emit_mov_reg_mmx (compiler, src->ptr_offset, tmp);
+ orc_sse_emit_movd_load_register (compiler, src->ptr_offset, tmp);
orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), tmp, tmp);
orc_mmx_emit_psrlw_imm (compiler, 8, tmp);
orc_mmx_emit_pmullw (compiler, tmp2, tmp);
}
-#ifdef MMX
-static void
-sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
-{
- int type = ORC_PTR_TO_INT(user);
- int imm_code1[] = { 0x71, 0x71, 0x71, 0x72, 0x72, 0x72, 0x73, 0x73 };
- int imm_code2[] = { 6, 2, 4, 6, 2, 4, 6, 2 };
- int reg_code[] = { 0xf1, 0xd1, 0xe1, 0xf2, 0xd2, 0xe2, 0xf3, 0xd3 };
- const char *code[] = { "psllw", "psrlw", "psraw", "pslld", "psrld", "psrad", "psllq", "psrlq" };
-
- if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_CONST) {
- orc_mmx_emit_shiftimm (p, code[type], imm_code1[type], imm_code2[type],
- p->vars[insn->src_args[1]].value.i,
- p->vars[insn->dest_args[0]].alloc);
- } else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
- int tmp = orc_compiler_get_temp_reg (p);
-
- /* FIXME this is a gross hack to reload the register with a
- * 64-bit version of the parameter. */
- orc_x86_emit_mov_memoffset_sse (p, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]),
- p->exec_reg, tmp, FALSE);
-
- orc_mmx_emit_660f (p, code[type], reg_code[type], tmp,
- p->vars[insn->dest_args[0]].alloc);
- } else {
- ORC_COMPILER_ERROR(p,"rule only works with constants or params");
- p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE;
- }
-}
-#else
static void
sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
{
p->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE;
}
}
-#endif
static void
sse_rule_shlb (OrcCompiler *p, void *user, OrcInstruction *insn)
#include <orc/orcprogram.h>
#include <orc/orcdebug.h>
#include <orc/orcsse.h>
+#include <orc/orcmmx.h>
#include <orc/orcx86insn.h>
/**
};
if (i>=X86_XMM0 && i<X86_XMM0 + 16) return x86_regs[i - X86_XMM0];
+ if (i>=X86_MM0 && i<X86_MM0 + 8) return "ERROR_MMX";
switch (i) {
case 0:
return "UNALLOCATED";
#ifdef ORC_ENABLE_UNSTABLE_API
typedef enum {
- X86_XMM0 = ORC_VEC_REG_BASE,
+ X86_XMM0 = ORC_VEC_REG_BASE + 16,
X86_XMM1,
X86_XMM2,
X86_XMM3,
static const OrcSysOpcode orc_x86_opcodes[] = {
- { "punpcklbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f60 },
- { "punpcklwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f61 },
- { "punpckldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f62 },
- { "packsswb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f63 },
- { "pcmpgtb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f64 },
- { "pcmpgtw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f65 },
- { "pcmpgtd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f66 },
- { "packuswb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f67 },
- { "punpckhbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f68 },
- { "punpckhwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f69 },
- { "punpckhdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6a },
- { "packssdw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6b },
- { "punpcklqdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6c },
- { "punpckhqdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6d },
- { "movdqa", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6f },
- { "psraw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe1 },
- { "psrlw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd1 },
- { "psllw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff1 },
- { "psrad", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe2 },
- { "psrld", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd2 },
- { "pslld", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff2 },
- { "psrlq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd3 },
- { "psllq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff3 },
- { "psrldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f73 },
- { "pslldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f73 },
- { "psrlq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd3 },
- { "pcmpeqb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f74 },
- { "pcmpeqw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f75 },
- { "pcmpeqd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f76 },
- { "paddq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd4 },
- { "pmullw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd5 },
- { "psubusb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd8 },
- { "psubusw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fd9 },
- { "pminub", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fda },
- { "pand", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fdb },
- { "paddusb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fdc },
- { "paddusw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fdd },
- { "pmaxub", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fde },
- { "pandn", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fdf },
- { "pavgb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe0 },
- { "pavgw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe3 },
- { "pmulhuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe4 },
- { "pmulhw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe5 },
- { "psubsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe8 },
- { "psubsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fe9 },
- { "pminsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fea },
- { "por", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0feb },
- { "paddsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fec },
- { "paddsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fed },
- { "pmaxsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fee },
- { "pxor", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0fef },
- { "pmuludq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff4 },
- { "pmaddwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff5 },
- { "psadbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff6 },
- { "psubb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff8 },
- { "psubw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ff9 },
- { "psubd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ffa },
- { "psubq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ffb },
- { "paddb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ffc },
- { "paddw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ffd },
- { "paddd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0ffe },
- { "pshufb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3800 },
- { "phaddw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3801 },
- { "phaddd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3802 },
- { "phaddsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3803 },
- { "pmaddubsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3804 },
- { "phsubw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3805 },
- { "phsubd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3806 },
- { "phsubsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3807 },
- { "psignb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3808 },
- { "psignw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3809 },
- { "psignd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f380a },
- { "pmulhrsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f380b },
- { "pabsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f381c },
- { "pabsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f381d },
- { "pabsd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f381e },
- { "pmovsxbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3820 },
- { "pmovsxbd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3821 },
- { "pmovsxbq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3822 },
- { "pmovsxwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3823 },
- { "pmovsxwq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3824 },
- { "pmovsxdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3825 },
- { "pmuldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3828 },
- { "pcmpeqq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3829 },
- { "packusdw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f382b },
- { "pmovzxbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3830 },
- { "pmovzxbd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3831 },
- { "pmovzxbq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3832 },
- { "pmovzxwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3833 },
- { "pmovzxwq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3834 },
- { "pmovzxdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3835 },
- { "pmulld", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3840 },
- { "phminposuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3841 },
- { "pminsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3838 },
- { "pminsd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3839 },
- { "pminuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f383a },
- { "pminud", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f383b },
- { "pmaxsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f383c },
- { "pmaxsd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f383d },
- { "pmaxuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f383e },
- { "pmaxud", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f383f },
- { "pcmpgtq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f3837 },
+ { "punpcklbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f60 },
+ { "punpcklwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f61 },
+ { "punpckldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f62 },
+ { "packsswb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f63 },
+ { "pcmpgtb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f64 },
+ { "pcmpgtw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f65 },
+ { "pcmpgtd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f66 },
+ { "packuswb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f67 },
+ { "punpckhbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f68 },
+ { "punpckhwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f69 },
+ { "punpckhdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f6a },
+ { "packssdw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f6b },
+ { "punpcklqdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f6c },
+ { "punpckhqdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f6d },
+ { "movdqa", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f6f },
+ { "psraw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe1 },
+ { "psrlw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd1 },
+ { "psllw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff1 },
+ { "psrad", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe2 },
+ { "psrld", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd2 },
+ { "pslld", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff2 },
+ { "psrlq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd3 },
+ { "psllq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff3 },
+ { "psrldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f73 },
+ { "pslldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f73 },
+ { "psrlq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd3 },
+ { "pcmpeqb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f74 },
+ { "pcmpeqw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f75 },
+ { "pcmpeqd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f76 },
+ { "paddq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd4 },
+ { "pmullw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd5 },
+ { "psubusb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd8 },
+ { "psubusw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fd9 },
+ { "pminub", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fda },
+ { "pand", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fdb },
+ { "paddusb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fdc },
+ { "paddusw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fdd },
+ { "pmaxub", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fde },
+ { "pandn", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fdf },
+ { "pavgb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe0 },
+ { "pavgw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe3 },
+ { "pmulhuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe4 },
+ { "pmulhw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe5 },
+ { "psubsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe8 },
+ { "psubsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fe9 },
+ { "pminsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fea },
+ { "por", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0feb },
+ { "paddsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fec },
+ { "paddsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fed },
+ { "pmaxsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fee },
+ { "pxor", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0fef },
+ { "pmuludq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff4 },
+ { "pmaddwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff5 },
+ { "psadbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff6 },
+ { "psubb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff8 },
+ { "psubw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ff9 },
+ { "psubd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ffa },
+ { "psubq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ffb },
+ { "paddb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ffc },
+ { "paddw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ffd },
+ { "paddd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0ffe },
+ { "pshufb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3800 },
+ { "phaddw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3801 },
+ { "phaddd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3802 },
+ { "phaddsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3803 },
+ { "pmaddubsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3804 },
+ { "phsubw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3805 },
+ { "phsubd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3806 },
+ { "phsubsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3807 },
+ { "psignb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3808 },
+ { "psignw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3809 },
+ { "psignd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f380a },
+ { "pmulhrsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f380b },
+ { "pabsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f381c },
+ { "pabsw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f381d },
+ { "pabsd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f381e },
+ { "pmovsxbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3820 },
+ { "pmovsxbd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3821 },
+ { "pmovsxbq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3822 },
+ { "pmovsxwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3823 },
+ { "pmovsxwq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3824 },
+ { "pmovsxdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3825 },
+ { "pmuldq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3828 },
+ { "pcmpeqq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3829 },
+ { "packusdw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f382b },
+ { "pmovzxbw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3830 },
+ { "pmovzxbd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3831 },
+ { "pmovzxbq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3832 },
+ { "pmovzxwd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3833 },
+ { "pmovzxwq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3834 },
+ { "pmovzxdq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3835 },
+ { "pmulld", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3840 },
+ { "phminposuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3841 },
+ { "pminsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3838 },
+ { "pminsd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3839 },
+ { "pminuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f383a },
+ { "pminud", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f383b },
+ { "pmaxsb", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f383c },
+ { "pmaxsd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f383d },
+ { "pmaxuw", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f383e },
+ { "pmaxud", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f383f },
+ { "pcmpgtq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x01, 0x0f3837 },
{ "addps", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x00, 0x0f58 },
{ "subps", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x00, 0x0f5c },
{ "mulps", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x00, 0x0f59 },
{ "minpd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f5d },
{ "maxps", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x00, 0x0f5f },
{ "maxpd", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f5f },
- { "psraw", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f71, 4 },
- { "psrlw", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f71, 2 },
- { "psllw", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f71, 6 },
- { "psrad", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f72, 4 },
- { "psrld", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f72, 2 },
- { "pslld", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f72, 6 },
- { "psrlq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f73, 2 },
- { "psllq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f73, 6 },
- { "psrldq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f73, 3 },
- { "pslldq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x66, 0x0f73, 7 },
+ { "psraw", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f71, 4 },
+ { "psrlw", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f71, 2 },
+ { "psllw", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f71, 6 },
+ { "psrad", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f72, 4 },
+ { "psrld", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f72, 2 },
+ { "pslld", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f72, 6 },
+ { "psrlq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f73, 2 },
+ { "psllq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f73, 6 },
+ { "psrldq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f73, 3 },
+ { "pslldq", ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT, 0, 0x01, 0x0f73, 7 },
{ "pshufd", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0x66, 0x0f70 },
{ "pshuflw", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0xf2, 0x0f70 },
{ "pshufhw", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0xf3, 0x0f70 },
{ "palignr", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0x66, 0x0f3a0f },
- { "pinsrw", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x66, 0x0fc4 },
- { "movd", ORC_X86_INSN_TYPE_REGM_MMX, 0, 0x66, 0x0f6e },
- { "movq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0xf3, 0x0f7e },
+ { "pinsrw", ORC_X86_INSN_TYPE_IMM8_REGM_MMX, 0, 0x01, 0x0fc4 },
+ { "movd", ORC_X86_INSN_TYPE_REGM_MMX, 0, 0x01, 0x0f6e },
+ { "movq", ORC_X86_INSN_TYPE_SSEM_SSE, 0, 0xf3, 0x0f7e },
{ "movdqa", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x66, 0x0f6f },
{ "movdqu", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0xf3, 0x0f6f },
{ "movhps", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x00, 0x0f16 },
- { "pextrw", ORC_X86_INSN_TYPE_IMM8_MMX_REG_REV, 0, 0x66, 0x0f3a15 },
- { "movd", ORC_X86_INSN_TYPE_MMX_REGM_REV, 0, 0x66, 0x0f7e },
- { "movq", ORC_X86_INSN_TYPE_MMXM_MMX_REV, 0, 0x66, 0x0fd6 },
+ { "pextrw", ORC_X86_INSN_TYPE_IMM8_MMX_REG_REV, 0, 0x01, 0x0f3a15 },
+ { "movd", ORC_X86_INSN_TYPE_MMX_REGM_REV, 0, 0x01, 0x0f7e },
+ { "movq", ORC_X86_INSN_TYPE_SSEM_SSE_REV, 0, 0x66, 0x0fd6 },
{ "movdqa", ORC_X86_INSN_TYPE_MMXM_MMX_REV, 0, 0x66, 0x0f7f },
{ "movdqu", ORC_X86_INSN_TYPE_MMXM_MMX_REV, 0, 0xf3, 0x0f7f },
{ "movntdq", ORC_X86_INSN_TYPE_MMXM_MMX_REV, 0, 0x66, 0x0fe7 },
{ "sar", ORC_X86_INSN_TYPE_REGM, 0, 0x00, 0xd1, 7 },
{ "and", ORC_X86_INSN_TYPE_IMM32_A, 0, 0x00, 0x25, 4 },
{ "", ORC_X86_INSN_TYPE_ALIGN, 0, 0x00, 0x00 },
+ { "pshufw", ORC_X86_INSN_TYPE_IMM8_MMXM_MMX, 0, 0x00, 0x0f70 },
+ { "movq", ORC_X86_INSN_TYPE_MMXM_MMX, 0, 0x00, 0x0f6f },
+ { "movq", ORC_X86_INSN_TYPE_MMXM_MMX_REV, 0, 0x00, 0x0f7f },
};
static void
output_opcode (OrcCompiler *p, const OrcSysOpcode *opcode, int size,
- int src, int dest)
+ int src, int dest, int is_sse)
{
ORC_ASSERT(opcode->code != 0);
if (opcode->prefix != 0) {
- *p->codeptr++ = opcode->prefix;
+ if (opcode->prefix == 0x01) {
+ if (is_sse) {
+ *p->codeptr++ = 0x66;
+ }
+ } else {
+ *p->codeptr++ = opcode->prefix;
+ }
}
orc_x86_emit_rex (p, size, dest, 0, src);
if (opcode->code & 0xff0000) {
*p->codeptr++ = (opcode->code >> 0) & 0xff;
}
+const char *
+orc_x86_get_regname_mmxsse (int reg, int is_sse)
+{
+ if (is_sse) {
+ return orc_x86_get_regname_sse (reg);
+ } else {
+ return orc_x86_get_regname_mmx (reg);
+ }
+}
+
+int
+is_sse_reg (int reg)
+{
+ return (reg >= X86_XMM0) && (reg <= X86_XMM15);
+}
+
void
orc_x86_insn_output_asm (OrcCompiler *p, OrcX86Insn *xinsn)
{
char imm_str[40] = { 0 };
char op1_str[40] = { 0 };
char op2_str[40] = { 0 };
+ int is_sse;
if (xinsn->opcode->type == ORC_X86_INSN_TYPE_ALIGN) {
if (xinsn->size > 0) ORC_ASM_CODE(p,".p2align %d\n", xinsn->size);
return;
}
+ is_sse = FALSE;
+ if (is_sse_reg (xinsn->src) || is_sse_reg (xinsn->dest)) {
+ is_sse = TRUE;
+ }
+
switch (xinsn->opcode->type) {
case ORC_X86_INSN_TYPE_MMXM_MMX:
case ORC_X86_INSN_TYPE_SSEM_SSE:
case ORC_X86_INSN_TYPE_MMXM_MMX_REV:
+ case ORC_X86_INSN_TYPE_SSEM_SSE_REV:
case ORC_X86_INSN_TYPE_REGM_MMX:
case ORC_X86_INSN_TYPE_MMX_REGM_REV:
case ORC_X86_INSN_TYPE_REGM_REG:
case ORC_X86_INSN_TYPE_SSEM_SSE:
case ORC_X86_INSN_TYPE_IMM8_MMXM_MMX:
if (xinsn->type == ORC_X86_RM_REG) {
- sprintf(op1_str, "%%%s, ", orc_x86_get_regname_sse (xinsn->src));
+ sprintf(op1_str, "%%%s, ",
+ orc_x86_get_regname_mmxsse (xinsn->src, is_sse));
} else if (xinsn->type == ORC_X86_RM_MEMOFFSET) {
sprintf(op1_str, "%d(%%%s), ", xinsn->offset,
orc_x86_get_regname_ptr (p, xinsn->src));
}
break;
case ORC_X86_INSN_TYPE_MMXM_MMX_REV: /* FIXME misnamed */
+ case ORC_X86_INSN_TYPE_SSEM_SSE_REV:
case ORC_X86_INSN_TYPE_MMX_REGM_REV:
- sprintf(op1_str, "%%%s, ", orc_x86_get_regname_sse (xinsn->src));
+ sprintf(op1_str, "%%%s, ",
+ orc_x86_get_regname_mmxsse (xinsn->src, is_sse));
break;
case ORC_X86_INSN_TYPE_REGM_MMX:
case ORC_X86_INSN_TYPE_REGM_REG:
case ORC_X86_INSN_TYPE_IMM8_REGM_MMX:
case ORC_X86_INSN_TYPE_REGM_MMX:
case ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT:
- sprintf(op2_str, "%%%s", orc_x86_get_regname_sse (xinsn->dest));
+ sprintf(op2_str, "%%%s",
+ orc_x86_get_regname_mmxsse (xinsn->dest, is_sse));
break;
case ORC_X86_INSN_TYPE_MMXM_MMX_REV:
+ case ORC_X86_INSN_TYPE_SSEM_SSE_REV:
if (xinsn->type == ORC_X86_RM_REG) {
- sprintf(op2_str, "%%%s", orc_x86_get_regname_sse (xinsn->dest));
+ sprintf(op2_str, "%%%s",
+ orc_x86_get_regname_mmxsse (xinsn->dest, is_sse));
} else if (xinsn->type == ORC_X86_RM_MEMOFFSET) {
sprintf(op2_str, "%d(%%%s)", xinsn->offset,
orc_x86_get_regname_ptr (p, xinsn->dest));
void
orc_x86_insn_output_opcode (OrcCompiler *p, OrcX86Insn *xinsn)
{
+ int is_sse;
+
+ is_sse = FALSE;
+ if (is_sse_reg (xinsn->src) || is_sse_reg (xinsn->dest)) {
+ is_sse = TRUE;
+ }
+
switch (xinsn->opcode->type) {
case ORC_X86_INSN_TYPE_IMM8_MMX_SHIFT:
- output_opcode (p, xinsn->opcode, 4, xinsn->dest, 0);
+ output_opcode (p, xinsn->opcode, 4, xinsn->dest, 0, is_sse);
break;
case ORC_X86_INSN_TYPE_MMX_REGM_REV:
case ORC_X86_INSN_TYPE_MMXM_MMX_REV:
- output_opcode (p, xinsn->opcode, 4, xinsn->dest, xinsn->src);
+ case ORC_X86_INSN_TYPE_SSEM_SSE_REV:
+ output_opcode (p, xinsn->opcode, 4, xinsn->dest, xinsn->src, is_sse);
break;
case ORC_X86_INSN_TYPE_REG_REGM:
case ORC_X86_INSN_TYPE_IMM8_REGM:
case ORC_X86_INSN_TYPE_IMM32_REGM:
case ORC_X86_INSN_TYPE_REG8_REGM:
case ORC_X86_INSN_TYPE_REG16_REGM:
- output_opcode (p, xinsn->opcode, xinsn->size, xinsn->dest, xinsn->src);
+ output_opcode (p, xinsn->opcode, xinsn->size, xinsn->dest, xinsn->src, FALSE);
break;
case ORC_X86_INSN_TYPE_IMM8_MMXM_MMX:
case ORC_X86_INSN_TYPE_MMXM_MMX:
case ORC_X86_INSN_TYPE_SSEM_SSE:
case ORC_X86_INSN_TYPE_REGM_MMX:
- output_opcode (p, xinsn->opcode, 4, xinsn->src, xinsn->dest);
+ output_opcode (p, xinsn->opcode, 4, xinsn->src, xinsn->dest, is_sse);
break;
case ORC_X86_INSN_TYPE_IMM8_REGM_MMX:
case ORC_X86_INSN_TYPE_MEM:
case ORC_X86_INSN_TYPE_REGM_REG:
case ORC_X86_INSN_TYPE_STACK:
- output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest);
+ output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest, FALSE);
break;
case ORC_X86_INSN_TYPE_REGM:
- output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest);
+ output_opcode (p, xinsn->opcode, xinsn->size, xinsn->src, xinsn->dest, FALSE);
break;
case ORC_X86_INSN_TYPE_IMM32_REGM_MOV:
orc_x86_emit_rex (p, xinsn->size, 0, 0, xinsn->dest);
*p->codeptr++ = xinsn->opcode->code + (xinsn->dest&7);
break;
case ORC_X86_INSN_TYPE_NONE:
- output_opcode (p, xinsn->opcode, 4, 0, 0);
+ output_opcode (p, xinsn->opcode, 4, 0, 0, FALSE);
break;
case ORC_X86_INSN_TYPE_IMM32_A:
- output_opcode (p, xinsn->opcode, xinsn->size, 0, 0);
+ output_opcode (p, xinsn->opcode, xinsn->size, 0, 0, FALSE);
break;
case ORC_X86_INSN_TYPE_ALIGN:
{
break;
case ORC_X86_INSN_TYPE_REG_REGM:
case ORC_X86_INSN_TYPE_MMXM_MMX_REV:
+ case ORC_X86_INSN_TYPE_SSEM_SSE_REV:
case ORC_X86_INSN_TYPE_MMX_REGM_REV:
case ORC_X86_INSN_TYPE_REG8_REGM:
case ORC_X86_INSN_TYPE_REG16_REGM:
case ORC_X86_INSN_TYPE_MMXM_MMX:
case ORC_X86_INSN_TYPE_REG_REGM:
case ORC_X86_INSN_TYPE_MMXM_MMX_REV:
+ case ORC_X86_INSN_TYPE_SSEM_SSE_REV:
case ORC_X86_INSN_TYPE_MMX_REGM_REV:
case ORC_X86_INSN_TYPE_REG8_REGM:
case ORC_X86_INSN_TYPE_REG16_REGM:
ORC_X86_INSN_TYPE_IMM8_MMXM_MMX,
ORC_X86_INSN_TYPE_IMM8_MMX_REG_REV,
ORC_X86_INSN_TYPE_MMXM_MMX_REV,
+ ORC_X86_INSN_TYPE_SSEM_SSE_REV,
ORC_X86_INSN_TYPE_REGM_MMX,
ORC_X86_INSN_TYPE_MMX_REGM_REV,
ORC_X86_INSN_TYPE_REGM,
ORC_X86_palignr,
ORC_X86_pinsrw,
ORC_X86_movd_load,
- ORC_X86_movq_load,
+ ORC_X86_movq_sse_load,
ORC_X86_movdqa_load,
ORC_X86_movdqu_load,
ORC_X86_movhps_load,
ORC_X86_pextrw,
ORC_X86_movd_store,
- ORC_X86_movq_store,
+ ORC_X86_movq_sse_store,
ORC_X86_movdqa_store,
ORC_X86_movdqu_store,
ORC_X86_movntdq_store,
ORC_X86_sar,
ORC_X86_and_imm32_a,
ORC_X86_ALIGN,
+ ORC_X86_pshufw,
+ ORC_X86_movq_mmx_load,
+ ORC_X86_movq_mmx_store,
} OrcX86Opcode;
enum {
#define orc_sse_emit_pinsrw_memoffset(p,imm,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_pinsrw, 4, imm, offset, a, b)
#define orc_sse_emit_movd_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movd_load, 4, 0, offset, a, b)
-#define orc_sse_emit_movq_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movq_load, 4, 0, offset, a, b)
+#define orc_sse_emit_movq_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movq_sse_load, 4, 0, offset, a, b)
#define orc_sse_emit_movdqa_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movdqa_load, 4, 0, offset, a, b)
#define orc_sse_emit_movdqu_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movdqu_load, 4, 0, offset, a, b)
#define orc_sse_emit_movhps_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movhps_load, 4, 0, offset, a, b)
#define orc_sse_emit_pextrw_memoffset(p,imm,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_pextrw, 16, imm, a, offset, b)
#define orc_sse_emit_movd_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movd_store, 16, 0, a, offset, b)
-#define orc_sse_emit_movq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movq_store, 16, 0, a, offset, b)
+#define orc_sse_emit_movq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movq_sse_store, 16, 0, a, offset, b)
#define orc_sse_emit_movdqa_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movdqa_store, 16, 0, a, offset, b)
#define orc_sse_emit_movdqu_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movdqu_store, 16, 0, a, offset, b)
#define orc_sse_emit_movntdq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movntdq_store, 16, 0, a, offset, b)
#define orc_sse_emit_pinsrw_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrw, 4, imm, offset, a, a_index, shift, b)
#define orc_sse_emit_movd_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movd_load, 4, 0, offset, a, a_index, shift, b)
-#define orc_sse_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_load, 4, 0, offset, a, a_index, shift, b)
+#define orc_sse_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_sse_load, 4, 0, offset, a, a_index, shift, b)
#define orc_sse_emit_movdqa_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movdqa_load, 4, 0, offset, a, a_index, shift, b)
#define orc_sse_emit_movdqu_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movdqu_load, 4, 0, offset, a, a_index, shift, b)
#define orc_sse_emit_movhps_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movhps_load, 4, 0, offset, a, a_index, shift, b)
#define orc_sse_emit_pextrw_memindex(p,imm,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_pextrw, imm, a, offset, b, b_index, shift)
#define orc_sse_emit_movd_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movd_store, 0, a, offset, b, b_index, shift)
-#define orc_sse_emit_movq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movq_store, 0, a, offset, b, b_index, shift)
+#define orc_sse_emit_movq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movq_sse_store, 0, a, offset, b, b_index, shift)
#define orc_sse_emit_movdqa_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movdqa_store, 0, a, offset, b, b_index, shift)
#define orc_sse_emit_movdqu_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movdqu_store, 0, a, offset, b, b_index, shift)
#define orc_sse_emit_movntdq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movntdq_store, 0, a, offset, b, b_index, shift)
#define orc_sse_emit_pinsrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pinsrw, imm, a, b)
#define orc_sse_emit_movd_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_load, 4, a, b)
-#define orc_sse_emit_movq_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_load, 4, a, b)
+#define orc_sse_emit_movq_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_sse_load, 4, a, b)
#define orc_sse_emit_pextrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pextrw, imm, a, b)
#define orc_sse_emit_movd_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_store, 4, a, b)
-#define orc_sse_emit_movq_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_store, 4, a, b)
+#define orc_sse_emit_movq_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_sse_store, 4, a, b)
+
+
+
+
+#define orc_mmx_emit_punpcklbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpcklbw, 8, a, b)
+#define orc_mmx_emit_punpcklwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpcklwd, 8, a, b)
+#define orc_mmx_emit_punpckldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckldq, 8, a, b)
+#define orc_mmx_emit_packsswb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packsswb, 8, a, b)
+#define orc_mmx_emit_pcmpgtb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtb, 8, a, b)
+#define orc_mmx_emit_pcmpgtw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtw, 8, a, b)
+#define orc_mmx_emit_pcmpgtd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtd, 8, a, b)
+#define orc_mmx_emit_packuswb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packuswb, 8, a, b)
+#define orc_mmx_emit_punpckhbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhbw, 8, a, b)
+#define orc_mmx_emit_punpckhwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhwd, 8, a, b)
+#define orc_mmx_emit_punpckhdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhdq, 8, a, b)
+#define orc_mmx_emit_packssdw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packssdw, 8, a, b)
+#define orc_mmx_emit_punpcklqdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpcklqdq, 8, a, b)
+#define orc_mmx_emit_punpckhqdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_punpckhqdq, 8, a, b)
+#define orc_mmx_emit_psraw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psraw, 8, a, b)
+#define orc_mmx_emit_psrlw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrlw, 8, a, b)
+#define orc_mmx_emit_psllw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psllw, 8, a, b)
+#define orc_mmx_emit_psrad(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrad, 8, a, b)
+#define orc_mmx_emit_psrld(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrld, 8, a, b)
+#define orc_mmx_emit_pslld(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pslld, 8, a, b)
+#define orc_mmx_emit_psrlq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrlq, 8, a, b)
+#define orc_mmx_emit_psllq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psllq, 8, a, b)
+#define orc_mmx_emit_psrldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrldq, 8, a, b)
+#define orc_mmx_emit_pslldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pslldq, 8, a, b)
+#define orc_mmx_emit_psrlq_reg(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psrlq_reg, 8, a, b)
+#define orc_mmx_emit_pcmpeqb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqb, 8, a, b)
+#define orc_mmx_emit_pcmpeqw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqw, 8, a, b)
+#define orc_mmx_emit_pcmpeqd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqd, 8, a, b)
+#define orc_mmx_emit_paddq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddq, 8, a, b)
+#define orc_mmx_emit_pmullw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmullw, 8, a, b)
+#define orc_mmx_emit_psubusb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubusb, 8, a, b)
+#define orc_mmx_emit_psubusw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubusw, 8, a, b)
+#define orc_mmx_emit_pminub(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminub, 8, a, b)
+#define orc_mmx_emit_pand(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pand, 8, a, b)
+#define orc_mmx_emit_paddusb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddusb, 8, a, b)
+#define orc_mmx_emit_paddusw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddusw, 8, a, b)
+#define orc_mmx_emit_pmaxub(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxub, 8, a, b)
+#define orc_mmx_emit_pandn(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pandn, 8, a, b)
+#define orc_mmx_emit_pavgb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pavgb, 8, a, b)
+#define orc_mmx_emit_pavgw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pavgw, 8, a, b)
+#define orc_mmx_emit_pmulhuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulhuw, 8, a, b)
+#define orc_mmx_emit_pmulhw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulhw, 8, a, b)
+#define orc_mmx_emit_psubsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubsb, 8, a, b)
+#define orc_mmx_emit_psubsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubsw, 8, a, b)
+#define orc_mmx_emit_pminsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminsw, 8, a, b)
+#define orc_mmx_emit_por(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_por, 8, a, b)
+#define orc_mmx_emit_paddsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddsb, 8, a, b)
+#define orc_mmx_emit_paddsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddsw, 8, a, b)
+#define orc_mmx_emit_pmaxsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxsw, 8, a, b)
+#define orc_mmx_emit_pxor(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pxor, 8, a, b)
+#define orc_mmx_emit_pmuludq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmuludq, 8, a, b)
+#define orc_mmx_emit_pmaddwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaddwd, 8, a, b)
+#define orc_mmx_emit_psadbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psadbw, 8, a, b)
+#define orc_mmx_emit_psubb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubb, 8, a, b)
+#define orc_mmx_emit_psubw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubw, 8, a, b)
+#define orc_mmx_emit_psubd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubd, 8, a, b)
+#define orc_mmx_emit_psubq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psubq, 8, a, b)
+#define orc_mmx_emit_paddb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddb, 8, a, b)
+#define orc_mmx_emit_paddw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddw, 8, a, b)
+#define orc_mmx_emit_paddd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_paddd, 8, a, b)
+#define orc_mmx_emit_pshufb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pshufb, 8, a, b)
+#define orc_mmx_emit_phaddw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phaddw, 8, a, b)
+#define orc_mmx_emit_phaddd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phaddd, 8, a, b)
+#define orc_mmx_emit_phaddsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phaddsw, 8, a, b)
+#define orc_mmx_emit_pmaddubsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaddubsw, 8, a, b)
+#define orc_mmx_emit_phsubw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phsubw, 8, a, b)
+#define orc_mmx_emit_phsubd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phsubd, 8, a, b)
+#define orc_mmx_emit_phsubsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phsubsw, 8, a, b)
+#define orc_mmx_emit_psignb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psignb, 8, a, b)
+#define orc_mmx_emit_psignw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psignw, 8, a, b)
+#define orc_mmx_emit_psignd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_psignd, 8, a, b)
+#define orc_mmx_emit_pmulhrsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulhrsw, 8, a, b)
+#define orc_mmx_emit_pabsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pabsb, 8, a, b)
+#define orc_mmx_emit_pabsw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pabsw, 8, a, b)
+#define orc_mmx_emit_pabsd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pabsd, 8, a, b)
+#define orc_mmx_emit_pmovsxbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxbw, 8, a, b)
+#define orc_mmx_emit_pmovsxbd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxbd, 8, a, b)
+#define orc_mmx_emit_pmovsxbq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxbq, 8, a, b)
+#define orc_mmx_emit_pmovsxwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxwd, 8, a, b)
+#define orc_mmx_emit_pmovsxwq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxwq, 8, a, b)
+#define orc_mmx_emit_pmovsxdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovsxdq, 8, a, b)
+#define orc_mmx_emit_pmuldq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmuldq, 8, a, b)
+#define orc_mmx_emit_pcmpeqq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpeqq, 8, a, b)
+#define orc_mmx_emit_packusdw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_packusdw, 8, a, b)
+#define orc_mmx_emit_pmovzxbw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxbw, 8, a, b)
+#define orc_mmx_emit_pmovzxbd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxbd, 8, a, b)
+#define orc_mmx_emit_pmovzxbq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxbq, 8, a, b)
+#define orc_mmx_emit_pmovzxwd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxwd, 8, a, b)
+#define orc_mmx_emit_pmovzxwq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxwq, 8, a, b)
+#define orc_mmx_emit_pmovzxdq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmovzxdq, 8, a, b)
+#define orc_mmx_emit_pmulld(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmulld, 8, a, b)
+#define orc_mmx_emit_phminposuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_phminposuw, 8, a, b)
+#define orc_mmx_emit_pminsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminsb, 8, a, b)
+#define orc_mmx_emit_pminsd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminsd, 8, a, b)
+#define orc_mmx_emit_pminuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminuw, 8, a, b)
+#define orc_mmx_emit_pminud(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pminud, 8, a, b)
+#define orc_mmx_emit_pmaxsb(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxsb, 8, a, b)
+#define orc_mmx_emit_pmaxsd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxsd, 8, a, b)
+#define orc_mmx_emit_pmaxuw(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxuw, 8, a, b)
+#define orc_mmx_emit_pmaxud(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pmaxud, 8, a, b)
+#define orc_mmx_emit_pcmpgtq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_pcmpgtq, 8, a, b)
+#define orc_mmx_emit_addps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_addps, 8, a, b)
+#define orc_mmx_emit_subps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_subps, 8, a, b)
+#define orc_mmx_emit_mulps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_mulps, 8, a, b)
+#define orc_mmx_emit_divps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_divps, 8, a, b)
+#define orc_mmx_emit_sqrtps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_sqrtps, 8, a, b)
+#define orc_mmx_emit_addpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_addpd, 8, a, b)
+#define orc_mmx_emit_subpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_subpd, 8, a, b)
+#define orc_mmx_emit_mulpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_mulpd, 8, a, b)
+#define orc_mmx_emit_divpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_divpd, 8, a, b)
+#define orc_mmx_emit_sqrtpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_sqrtpd, 8, a, b)
+#define orc_mmx_emit_cmpeqps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpeqps, 8, a, b)
+#define orc_mmx_emit_cmpeqpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpeqpd, 8, a, b)
+#define orc_mmx_emit_cmpltps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpltps, 8, a, b)
+#define orc_mmx_emit_cmpltpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpltpd, 8, a, b)
+#define orc_mmx_emit_cmpleps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmpleps, 8, a, b)
+#define orc_mmx_emit_cmplepd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cmplepd, 8, a, b)
+#define orc_mmx_emit_cvttps2dq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvttps2dq, 8, a, b)
+#define orc_mmx_emit_cvttpd2dq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvttpd2dq, 8, a, b)
+#define orc_mmx_emit_cvtdq2ps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtdq2ps, 8, a, b)
+#define orc_mmx_emit_cvtdq2pd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtdq2pd, 8, a, b)
+#define orc_mmx_emit_cvtps2pd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtps2pd, 8, a, b)
+#define orc_mmx_emit_cvtpd2ps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_cvtpd2ps, 8, a, b)
+#define orc_mmx_emit_minps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_minps, 8, a, b)
+#define orc_mmx_emit_minpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_minpd, 8, a, b)
+#define orc_mmx_emit_maxps(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_maxps, 8, a, b)
+#define orc_mmx_emit_maxpd(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_maxpd, 8, a, b)
+#define orc_mmx_emit_psraw_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psraw_imm, imm, 0, b)
+#define orc_mmx_emit_psrlw_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrlw_imm, imm, 0, b)
+#define orc_mmx_emit_psllw_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psllw_imm, imm, 0, b)
+#define orc_mmx_emit_psrad_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrad_imm, imm, 0, b)
+#define orc_mmx_emit_psrld_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrld_imm, imm, 0, b)
+#define orc_mmx_emit_pslld_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pslld_imm, imm, 0, b)
+#define orc_mmx_emit_psrlq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrlq_imm, imm, 0, b)
+#define orc_mmx_emit_psllq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psllq_imm, imm, 0, b)
+#define orc_mmx_emit_psrldq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psrldq_imm, imm, 0, b)
+#define orc_mmx_emit_pslldq_imm(p,imm,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pslldq_imm, imm, 0, b)
+#define orc_mmx_emit_pshufd(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshufd, imm, a, b)
+#define orc_mmx_emit_pshuflw(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshuflw, imm, a, b)
+#define orc_mmx_emit_pshufhw(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshufhw, imm, a, b)
+#define orc_mmx_emit_palignr(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_psalignr, imm, a, b)
+
+#define orc_mmx_emit_pinsrw_memoffset(p,imm,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_pinsrw, 4, imm, offset, a, b)
+#define orc_mmx_emit_movd_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movd_load, 4, 0, offset, a, b)
+#define orc_mmx_emit_movq_load_memoffset(p,offset,a,b) orc_x86_emit_cpuinsn_load_memoffset(p, ORC_X86_movq_mmx_load, 4, 0, offset, a, b)
+
+#define orc_mmx_emit_pextrw_memoffset(p,imm,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_pextrw, 8, imm, a, offset, b)
+#define orc_mmx_emit_movd_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movd_store, 8, 0, a, offset, b)
+#define orc_mmx_emit_movq_store_memoffset(p,a,offset,b) orc_x86_emit_cpuinsn_store_memoffset(p, ORC_X86_movq_mmx_store, 8, 0, a, offset, b)
+
+#define orc_mmx_emit_pinsrw_memindex(p,imm,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_pinsrw, 4, imm, offset, a, a_index, shift, b)
+#define orc_mmx_emit_movd_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movd_load, 4, 0, offset, a, a_index, shift, b)
+#define orc_mmx_emit_movq_load_memindex(p,offset,a,a_index,shift,b) orc_x86_emit_cpuinsn_load_memindex(p, ORC_X86_movq_mmx_load, 4, 0, offset, a, a_index, shift, b)
+
+#define orc_mmx_emit_pextrw_memindex(p,imm,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_pextrw, imm, a, offset, b, b_index, shift)
+#define orc_mmx_emit_movd_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movd_store, 0, a, offset, b, b_index, shift)
+#define orc_mmx_emit_movq_store_memindex(p,a,offset,b,b_index,shift) orc_x86_emit_cpuinsn_store_memindex(p, ORC_X86_movq_mmx_store, 0, a, offset, b, b_index, shift)
+
+#define orc_mmx_emit_pinsrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pinsrw, imm, a, b)
+#define orc_mmx_emit_movd_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_load, 4, a, b)
+#define orc_mmx_emit_movq_load_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_mmx_load, 4, a, b)
+
+#define orc_mmx_emit_pextrw_register(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pextrw, imm, a, b)
+#define orc_mmx_emit_movd_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movd_store, 4, a, b)
+#define orc_mmx_emit_movq_store_register(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_mmx_store, 4, a, b)
+
+
+#define orc_mmx_emit_pshufw(p,imm,a,b) orc_x86_emit_cpuinsn_imm(p, ORC_X86_pshufw, imm, a, b)
+#define orc_mmx_emit_movq(p,a,b) orc_x86_emit_cpuinsn_size(p, ORC_X86_movq_mmx_load, 8, a, b)
+
#endif