x86_get_regname_sse(src),
x86_get_regname_sse(dest));
*p->codeptr++ = 0xf2;
- x86_emit_rex (p, 0, src, 0, dest);
+ x86_emit_rex (p, 0, dest, 0, src);
*p->codeptr++ = 0x0f;
*p->codeptr++ = code;
x86_emit_modrm_reg (p, src, dest);
x86_get_regname_sse(src),
x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
- x86_emit_rex (p, 0, src, 0, dest);
+ x86_emit_rex (p, 0, dest, 0, src);
*p->codeptr++ = 0x0f;
*p->codeptr++ = code;
x86_emit_modrm_reg (p, src, dest);
x86_get_regname_sse(src),
x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
- x86_emit_rex (p, 0, src, 0, dest);
+ x86_emit_rex (p, 0, dest, 0, src);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x38;
*p->codeptr++ = code;
if (value == 0) {
ORC_ASM_CODE(p," pxor %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0xef;
x86_emit_modrm_reg (p, reg, reg);
ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, reg, 0, X86_ECX);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x6e;
x86_emit_modrm_reg (p, X86_ECX, reg);
ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
void
sse_emit_loadpb (OrcCompiler *p, int reg, int param)
{
- ORC_ASM_CODE(p," movd %d(%%%s), %%%s\n",
+ x86_emit_mov_memoffset_sse (p, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]),
- x86_get_regname_ptr(x86_exec_ptr),
- x86_get_regname_sse(reg));
- *p->codeptr++ = 0x66;
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0x6e;
- x86_emit_modrm_memoffset (p, reg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]), x86_exec_ptr);
+ x86_exec_ptr, reg, FALSE);
sse_emit_660f (p, "punpcklbw", 0x60, reg, reg);
ORC_ASM_CODE(p," pshuflw $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0xf2;
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
void
sse_emit_loadpw (OrcCompiler *p, int reg, int param)
{
- ORC_ASM_CODE(p," movd %d(%%%s), %%%s\n",
+ x86_emit_mov_memoffset_sse (p, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]),
- x86_get_regname_ptr(x86_exec_ptr),
- x86_get_regname_sse(reg));
- *p->codeptr++ = 0x66;
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0x6e;
- x86_emit_modrm_memoffset (p, reg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]), x86_exec_ptr);
+ x86_exec_ptr, reg, FALSE);
ORC_ASM_CODE(p," pshuflw $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0xf2;
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
void
sse_emit_loadpl (OrcCompiler *p, int reg, int param)
{
- ORC_ASM_CODE(p," movd %d(%%%s), %%%s\n",
+ x86_emit_mov_memoffset_sse (p, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]),
- x86_get_regname_ptr(x86_exec_ptr),
- x86_get_regname_sse(reg));
- *p->codeptr++ = 0x66;
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0x6e;
- x86_emit_modrm_memoffset (p, reg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]), x86_exec_ptr);
+ x86_exec_ptr, reg, FALSE);
ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
void
sse_emit_loadpq (OrcCompiler *p, int reg, int param)
{
- ORC_ASM_CODE(p," movq %d(%%%s), %%%s\n",
+ x86_emit_mov_memoffset_sse (p, 8,
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]),
- x86_get_regname_ptr(x86_exec_ptr),
- x86_get_regname_sse(reg));
- *p->codeptr++ = 0xf3;
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0x7e;
- x86_emit_modrm_memoffset (p, reg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]), x86_exec_ptr);
+ x86_exec_ptr, reg, FALSE);
ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, reg, 0, reg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
x86_get_regname_sse(src), x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
- x86_emit_rex (p, 0, src, 0, dest);
+ x86_emit_rex (p, 0, dest, 0, src);
*p->codeptr++ = 0x0f;
if (code & 0xff00) {
*p->codeptr++ = code >> 8;
#if 0
ORC_ASM_CODE(p," movd %%%s, %%ecx\n", x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, dest, 0, X86_ECX);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x7e;
x86_emit_modrm_reg (p, X86_ECX, dest);
if (p->loop_shift > 3) {
ORC_ASM_CODE(p," psrldq $8, %%%s\n", x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, dest);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x73;
x86_emit_modrm_reg (p, dest, 3);
ORC_ASM_CODE(p," movd %%%s, %%ecx\n", x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, dest, 0, X86_ECX);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x7e;
x86_emit_modrm_reg (p, X86_ECX, dest);
src = p->tmpreg;
}
- x86_emit_mov_imm_reg (p, 4, imm_vals[((int)user)], X86_ECX);
+ x86_emit_mov_imm_reg (p, 4, imm_vals[ORC_PTR_TO_INT(user)], X86_ECX);
ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, dest, 0, X86_ECX);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x6e;
x86_emit_modrm_reg (p, X86_ECX, dest);
ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(dest),
x86_get_regname_sse(dest));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, dest, 0, dest);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, dest, dest);
*p->codeptr++ = 0x00;
- sse_emit_660f38 (p, names[((int)user)], codes[((int)user)], src, dest);
+ sse_emit_660f38 (p, names[ORC_PTR_TO_INT(user)], codes[ORC_PTR_TO_INT(user)], src, dest);
}
static void
sse_rule_shift (OrcCompiler *p, void *user, OrcInstruction *insn)
{
- int type = (int)user;
+ int type = ORC_PTR_TO_INT(user);
int imm_code1[] = { 0x71, 0x71, 0x71, 0x72, 0x72, 0x72 };
int imm_code2[] = { 6, 2, 4, 6, 2, 4 };
int reg_code[] = { 0xf1, 0xd1, 0xe1, 0xf2, 0xd2, 0xe2 };
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = imm_code1[type];
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, imm_code2[type]);
} else if (p->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
/* FIXME this is a gross hack to reload the register with a
* 64-bit version of the parameter. */
- ORC_ASM_CODE(p," movd %d(%%%s), %%%s\n",
+ x86_emit_mov_memoffset_sse (p, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]),
- x86_get_regname_ptr(x86_exec_ptr),
- x86_get_regname_sse(p->tmpreg));
- *p->codeptr++ = 0x66;
- *p->codeptr++ = 0x0f;
- *p->codeptr++ = 0x6e;
- x86_emit_modrm_memoffset (p,
- p->tmpreg,
- (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]),
- x86_exec_ptr);
+ x86_exec_ptr, p->tmpreg, FALSE);
ORC_ASM_CODE(p," %s %%%s, %%%s\n", code[type],
x86_get_regname_sse(p->tmpreg),
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, p->vars[insn->dest_args[0]].alloc, 0, p->tmpreg);
*p->codeptr++ = 0x0f;
*p->codeptr++ = reg_code[type];
- x86_emit_modrm_reg (p, p->tmpreg,
- p->vars[insn->dest_args[0]].alloc);
+ x86_emit_modrm_reg (p, p->tmpreg, p->vars[insn->dest_args[0]].alloc);
} else {
ORC_PROGRAM_ERROR(p,"rule only works with constants or params");
}
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x71;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 4);
ORC_ASM_CODE(p," psrlw $8, %%%s\n",
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x71;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 2);
ORC_ASM_CODE(p," psllw $8, %%%s\n",
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x71;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 6);
ORC_ASM_CODE(p," psrlw $8, %%%s\n",
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x71;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 2);
ORC_ASM_CODE(p," psrad $16, %%%s\n",
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x72;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 4);
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x72;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 2);
ORC_ASM_CODE(p," pslld $16, %%%s\n",
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x72;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 6);
ORC_ASM_CODE(p," psrad $16, %%%s\n",
x86_get_regname_sse(p->vars[insn->dest_args[0]].alloc));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, 0, 0, p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x72;
x86_emit_modrm_reg (p, p->vars[insn->dest_args[0]].alloc, 4);
ORC_ASM_CODE(p," movd %%ecx, %%%s\n", x86_get_regname_sse(tmp));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, tmp, 0, X86_ECX);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x6e;
x86_emit_modrm_reg (p, X86_ECX, tmp);
ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(tmp),
x86_get_regname_sse(tmp));
*p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, tmp, 0, tmp);
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, tmp, tmp);
/* SSE 3 -- no rules */
+if (0) {
/* SSSE 3 */
rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target);
REG(absb);
REG(absw);
REG(absl);
+}
if (0) {
/* SSE 4.1 */
if (x86_64) {
if (size >= 8) rex |= 0x08;
- if (reg1 == 1 || (x86_get_regnum(reg1)>=8)) rex |= 0x4;
- if (reg2 == 1 || (x86_get_regnum(reg2)>=8)) rex |= 0x2;
- if (reg3 == 1 || (x86_get_regnum(reg3)>=8)) rex |= 0x1;
+ if (reg1 & 8) rex |= 0x4;
+ if (reg2 & 8) rex |= 0x2;
+ if (reg3 & 8) rex |= 0x1;
if (rex != 0x40) *compiler->codeptr++ = rex;
}
switch (size) {
case 1:
- ORC_ASM_CODE(compiler," movb %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
+ ORC_ASM_CODE(compiler," movb %d(%%%s), %%%s\n", offset,
+ x86_get_regname_ptr(reg1),
x86_get_regname_8(reg2));
+ x86_emit_rex(compiler, size, reg2, 0, reg1);
*compiler->codeptr++ = 0x8a;
x86_emit_modrm_memoffset (compiler, reg2, offset, reg1);
return;
ORC_ASM_CODE(compiler," movq %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
x86_get_regname_sse(reg2));
*compiler->codeptr++ = 0xf3;
+ x86_emit_rex(compiler, 0, reg2, 0, reg1);
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0x7e;
break;
case 1:
ORC_ASM_CODE(compiler," movb %%%s, %d(%%%s)\n", x86_get_regname_8(reg1), offset,
x86_get_regname_ptr(reg2));
+ x86_emit_rex(compiler, size, reg1, 0, reg2);
*compiler->codeptr++ = 0x88;
x86_emit_modrm_memoffset (compiler, reg1, offset, reg2);
return;
ORC_ASM_CODE(compiler," movntdq %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
x86_get_regname_ptr(reg2));
*compiler->codeptr++ = 0x66;
+ x86_emit_rex(compiler, 0, reg1, 0, reg2);
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0xe7;
} else {
ORC_ASM_CODE(compiler," movdqa %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
x86_get_regname_ptr(reg2));
*compiler->codeptr++ = 0x66;
+ x86_emit_rex(compiler, 0, reg1, 0, reg2);
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0x7f;
}
ORC_ASM_CODE(compiler," movdqu %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
x86_get_regname_ptr(reg2));
*compiler->codeptr++ = 0xf3;
+ x86_emit_rex(compiler, 0, reg1, 0, reg2);
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0x7f;
}
void x86_emit_mov_reg_sse (OrcCompiler *compiler, int reg1, int reg2)
{
- /* FIXME */
ORC_ASM_CODE(compiler," movd %%%s, %%%s\n", x86_get_regname(reg1),
x86_get_regname_sse(reg2));
*compiler->codeptr++ = 0x66;
- x86_emit_rex(compiler, 0, reg1, 0, reg2);
+ x86_emit_rex(compiler, 0, reg2, 0, reg1);
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0x6e;
x86_emit_modrm_reg (compiler, reg1, reg2);
void x86_emit_mov_sse_reg (OrcCompiler *compiler, int reg1, int reg2)
{
- /* FIXME */
ORC_ASM_CODE(compiler," movd %%%s, %%%s\n", x86_get_regname_sse(reg1),
x86_get_regname(reg2));
*compiler->codeptr++ = 0x66;
- x86_emit_rex(compiler, 0, reg2, 0, reg1);
+ x86_emit_rex(compiler, 0, reg1, 0, reg2);
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0x7e;
x86_emit_modrm_reg (compiler, reg2, reg1);