From: David Schleef Date: Sun, 31 Oct 2010 12:38:52 +0000 (+0100) Subject: sse: more conversion to sysinsn X-Git-Tag: orc-0.4.12~49 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e0b8eaec872013a680cff3326fca4125a2faa22c;p=platform%2Fupstream%2Forc.git sse: more conversion to sysinsn --- diff --git a/orc/orcsse.c b/orc/orcsse.c index 902316b..d60a8a0 100644 --- a/orc/orcsse.c +++ b/orc/orcsse.c @@ -39,6 +39,7 @@ orc_x86_get_regname_sse(int i) } +#if 0 void orc_sse_emit_pextrw_memoffset (OrcCompiler *p, int imm, int src, int offset, int dest) @@ -54,6 +55,7 @@ orc_sse_emit_pextrw_memoffset (OrcCompiler *p, int imm, int src, orc_x86_emit_modrm_memoffset (p, src, offset, dest); *p->codeptr++ = imm; } +#endif void orc_x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset, @@ -142,45 +144,20 @@ orc_x86_emit_mov_sse_memoffset (OrcCompiler *compiler, int size, int reg1, int o { switch (size) { case 4: - ORC_ASM_CODE(compiler," movd %%%s, %d(%%%s)\n", orc_x86_get_regname_sse(reg1), offset, - orc_x86_get_regname_ptr(compiler, reg2)); - *compiler->codeptr++ = 0x66; - orc_x86_emit_rex(compiler, 0, reg1, 0, reg2); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0x7e; + orc_sse_emit_movd_store_memoffset (compiler, offset, reg1, reg2); break; case 8: - ORC_ASM_CODE(compiler," movq %%%s, %d(%%%s)\n", orc_x86_get_regname_sse(reg1), offset, - orc_x86_get_regname_ptr(compiler, reg2)); - *compiler->codeptr++ = 0x66; - orc_x86_emit_rex(compiler, 0, reg1, 0, reg2); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0xd6; + orc_sse_emit_movq_store_memoffset (compiler, offset, reg1, reg2); break; case 16: if (aligned) { if (uncached) { - ORC_ASM_CODE(compiler," movntdq %%%s, %d(%%%s)\n", orc_x86_get_regname_sse(reg1), offset, - orc_x86_get_regname_ptr(compiler, reg2)); - *compiler->codeptr++ = 0x66; - orc_x86_emit_rex(compiler, 0, reg1, 0, reg2); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0xe7; + orc_sse_emit_movntdq_store_memoffset (compiler, offset, reg1, reg2); } else { - ORC_ASM_CODE(compiler," movdqa %%%s, %d(%%%s)\n", orc_x86_get_regname_sse(reg1), offset, - orc_x86_get_regname_ptr(compiler, reg2)); - *compiler->codeptr++ = 0x66; - orc_x86_emit_rex(compiler, 0, reg1, 0, reg2); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0x7f; + orc_sse_emit_movdqa_store_memoffset (compiler, offset, reg1, reg2); } } else { - ORC_ASM_CODE(compiler," movdqu %%%s, %d(%%%s)\n", orc_x86_get_regname_sse(reg1), offset, - orc_x86_get_regname_ptr(compiler, reg2)); - *compiler->codeptr++ = 0xf3; - orc_x86_emit_rex(compiler, 0, reg1, 0, reg2); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0x7f; + orc_sse_emit_movdqu_store_memoffset (compiler, offset, reg1, reg2); } break; default: @@ -188,7 +165,6 @@ orc_x86_emit_mov_sse_memoffset (OrcCompiler *compiler, int size, int reg1, int o break; } - orc_x86_emit_modrm_memoffset (compiler, reg1, offset, reg2); } void orc_x86_emit_mov_sse_reg_reg (OrcCompiler *compiler, int reg1, int reg2) @@ -197,6 +173,8 @@ void orc_x86_emit_mov_sse_reg_reg (OrcCompiler *compiler, int reg1, int reg2) return; } + orc_sse_emit_movdqu (compiler, offset, reg1, reg2); +#if 0 ORC_ASM_CODE(compiler," movdqa %%%s, %%%s\n", orc_x86_get_regname_sse(reg1), orc_x86_get_regname_sse(reg2)); @@ -205,6 +183,7 @@ void orc_x86_emit_mov_sse_reg_reg (OrcCompiler *compiler, int reg1, int reg2) *compiler->codeptr++ = 0x0f; *compiler->codeptr++ = 0x6f; orc_x86_emit_modrm_reg (compiler, reg1, reg2); +#endif } void orc_x86_emit_mov_reg_sse (OrcCompiler *compiler, int reg1, int reg2) diff --git a/orc/orcsse.h b/orc/orcsse.h index 370c3c9..def5688 100644 --- a/orc/orcsse.h +++ b/orc/orcsse.h @@ -59,7 +59,6 @@ void orc_sse_emit_loadpw (OrcCompiler *p, int reg, int value); void orc_sse_emit_loadpl (OrcCompiler *p, int reg, int value); void orc_sse_emit_loadpq (OrcCompiler *p, int reg, int value); -void orc_sse_emit_pextrw_memoffset (OrcCompiler *p, int imm, int src, int offset, int dest); void orc_sse_set_mxcsr (OrcCompiler *compiler); void orc_sse_restore_mxcsr (OrcCompiler *compiler); @@ -69,8 +68,10 @@ void orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, void orc_sse_emit_sysinsn (OrcCompiler *p, int opcode, int src, int dest, int imm); -void orc_sse_emit_sysinsn_memoffset (OrcCompiler *p, int index, int offset, +void orc_sse_emit_sysinsn_load_memoffset (OrcCompiler *p, int index, int offset, int src, int dest, int imm); +void orc_sse_emit_sysinsn_store_memoffset (OrcCompiler *p, int index, int src, + int offset, int dest, int imm); unsigned int orc_sse_get_cpu_flags (void); diff --git a/orc/orcx86insn.c b/orc/orcx86insn.c index baac64b..9fc6573 100644 --- a/orc/orcx86insn.c +++ b/orc/orcx86insn.c @@ -157,6 +157,12 @@ static const OrcSysOpcode orc_x86_opcodes[] = { { "movdqa", ORC_X86_INSN_TYPE_SD_REV, 0, 0x660f6f }, { "movdqu", ORC_X86_INSN_TYPE_SD_REV, 0, 0xf30f6f }, { "movhps", ORC_X86_INSN_TYPE_SD_REV, 0, 0x0f16 }, + { "pextrw", ORC_X86_INSN_TYPE_SDI, 0, 0x660f3a15 }, + { "movd", ORC_X86_INSN_TYPE_SD, 0, 0x660f7e }, + { "movq", ORC_X86_INSN_TYPE_SD, 0, 0x660fd6 }, + { "movdqa", ORC_X86_INSN_TYPE_SD, 0, 0x660f7f }, + { "movdqu", ORC_X86_INSN_TYPE_SD, 0, 0xf30f7f }, + { "movntdq", ORC_X86_INSN_TYPE_SD, 0, 0x660fe7 }, }; @@ -169,6 +175,7 @@ orc_sse_emit_sysinsn (OrcCompiler *p, int index, int imm, int src, int dest) switch (opcode->type) { case ORC_X86_INSN_TYPE_SD: case ORC_X86_INSN_TYPE_SD2: + case ORC_X86_INSN_TYPE_SD_REV: ORC_ASM_CODE(p," %s %%%s, %%%s\n", opcode->name, orc_x86_get_regname_sse(src), orc_x86_get_regname_sse(dest)); @@ -208,6 +215,7 @@ orc_sse_emit_sysinsn (OrcCompiler *p, int index, int imm, int src, int dest) switch (opcode->type) { case ORC_X86_INSN_TYPE_SD: + case ORC_X86_INSN_TYPE_SD_REV: orc_x86_emit_modrm_reg (p, src, dest); break; case ORC_X86_INSN_TYPE_SHIFTIMM: @@ -230,7 +238,7 @@ orc_sse_emit_sysinsn (OrcCompiler *p, int index, int imm, int src, int dest) } void -orc_sse_emit_sysinsn_memoffset (OrcCompiler *p, int index, int imm, int offset, +orc_sse_emit_sysinsn_load_memoffset (OrcCompiler *p, int index, int imm, int offset, int src, int dest) { const OrcSysOpcode *opcode = orc_x86_opcodes + index; @@ -298,3 +306,74 @@ orc_sse_emit_sysinsn_memoffset (OrcCompiler *p, int index, int imm, int offset, } } +void +orc_sse_emit_sysinsn_store_memoffset (OrcCompiler *p, int index, int imm, int offset, + int src, int dest) +{ + const OrcSysOpcode *opcode = orc_x86_opcodes + index; + + switch (opcode->type) { + case ORC_X86_INSN_TYPE_SD: + case ORC_X86_INSN_TYPE_SD_REV: + case ORC_X86_INSN_TYPE_SD2: + ORC_ASM_CODE(p," %s %%%s, %d(%%%s)\n", opcode->name, + orc_x86_get_regname_sse(src), + offset, + orc_x86_get_regname_ptr(p, dest)); + break; + case ORC_X86_INSN_TYPE_SDI: + case ORC_X86_INSN_TYPE_SDI_REV: + ORC_ASM_CODE(p," %s $%d, %%%s, %d(%%%s)\n", opcode->name, + imm, orc_x86_get_regname_sse(src), + offset, + orc_x86_get_regname_ptr(p, dest)); + break; + default: + ORC_ASSERT(0); + break; + } + + if (opcode->code & 0xff000000) { + *p->codeptr++ = (opcode->code >> 24) & 0xff; + orc_x86_emit_rex (p, 0, dest, 0, src); + *p->codeptr++ = (opcode->code >> 16) & 0xff; + *p->codeptr++ = (opcode->code >> 8) & 0xff; + *p->codeptr++ = (opcode->code >> 0) & 0xff; + } else if (opcode->code & 0xff0000) { + *p->codeptr++ = (opcode->code >> 16) & 0xff; + orc_x86_emit_rex (p, 0, dest, 0, src); + *p->codeptr++ = (opcode->code >> 8) & 0xff; + *p->codeptr++ = (opcode->code >> 0) & 0xff; + } else { + *p->codeptr++ = (opcode->code >> 8) & 0xff; + orc_x86_emit_rex (p, 0, dest, 0, src); + *p->codeptr++ = (opcode->code >> 0) & 0xff; + } + + switch (opcode->type) { + case ORC_X86_INSN_TYPE_SD: + orc_x86_emit_modrm_memoffset (p, src, offset, dest); + break; + case ORC_X86_INSN_TYPE_SDI: + orc_x86_emit_modrm_memoffset (p, src, offset, dest); + *p->codeptr++ = imm; + break; +#if 0 + case ORC_X86_INSN_TYPE_SDI_REV: + orc_x86_emit_modrm_memoffset (p, dest, offset, src); + *p->codeptr++ = imm; + break; + case ORC_X86_INSN_TYPE_SD_REV: + orc_x86_emit_modrm_memoffset (p, dest, offset, src); + break; + case ORC_X86_INSN_TYPE_SD2: + orc_x86_emit_modrm_memoffset (p, src, offset, dest); + *p->codeptr++ = opcode->code2; + break; +#endif + default: + ORC_ASSERT(0); + break; + } +} + diff --git a/orc/orcx86insn.h b/orc/orcx86insn.h index 189b200..812031e 100644 --- a/orc/orcx86insn.h +++ b/orc/orcx86insn.h @@ -160,6 +160,12 @@ enum { ORC_X86_movdqa_load, ORC_X86_movdqu_load, ORC_X86_movhps_load, + ORC_X86_pextrw, + ORC_X86_movd_store, + ORC_X86_movq_store, + ORC_X86_movdqa_store, + ORC_X86_movdqu_store, + ORC_X86_movntdq_store, }; @@ -306,13 +312,21 @@ enum { #define orc_sse_emit_pshuflw(p,imm,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pshuflw, imm, a, b) #define orc_sse_emit_pshufhw(p,imm,a,b) orc_sse_emit_sysinsn(p, ORC_X86_pshufhw, imm, a, b) #define orc_sse_emit_palignr(p,imm,a,b) orc_sse_emit_sysinsn(p, ORC_X86_psalignr, imm, a, b) +#define orc_sse_emit_movdqu(p,offset,a,b) orc_sse_emit_sysinsn(p, ORC_X86_movdqu_load, 0, a, b) -#define orc_sse_emit_pinsrw_memoffset(p,imm,offset,a,b) orc_sse_emit_sysinsn_memoffset(p, ORC_X86_pinsrw, imm, offset, a, b) -#define orc_sse_emit_movd_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_memoffset(p, ORC_X86_movd_load, 0, offset, a, b) -#define orc_sse_emit_movq_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_memoffset(p, ORC_X86_movq_load, 0, offset, a, b) -#define orc_sse_emit_movdqa_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_memoffset(p, ORC_X86_movdqa_load, 0, offset, a, b) -#define orc_sse_emit_movdqu_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_memoffset(p, ORC_X86_movdqu_load, 0, offset, a, b) -#define orc_sse_emit_movhps_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_memoffset(p, ORC_X86_movhps_load, 0, offset, a, b) +#define orc_sse_emit_pinsrw_memoffset(p,imm,offset,a,b) orc_sse_emit_sysinsn_load_memoffset(p, ORC_X86_pinsrw, imm, offset, a, b) +#define orc_sse_emit_movd_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_load_memoffset(p, ORC_X86_movd_load, 0, offset, a, b) +#define orc_sse_emit_movq_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_load_memoffset(p, ORC_X86_movq_load, 0, offset, a, b) +#define orc_sse_emit_movdqa_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_load_memoffset(p, ORC_X86_movdqa_load, 0, offset, a, b) +#define orc_sse_emit_movdqu_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_load_memoffset(p, ORC_X86_movdqu_load, 0, offset, a, b) +#define orc_sse_emit_movhps_load_memoffset(p,offset,a,b) orc_sse_emit_sysinsn_load_memoffset(p, ORC_X86_movhps_load, 0, offset, a, b) + +#define orc_sse_emit_pextrw_memoffset(p,imm,a,offset,b) orc_sse_emit_sysinsn_store_memoffset(p, ORC_X86_pextrw, imm, a, offset, b) +#define orc_sse_emit_movd_store_memoffset(p,a,offset,b) orc_sse_emit_sysinsn_store_memoffset(p, ORC_X86_movd_store, 0, a, offset, b) +#define orc_sse_emit_movq_store_memoffset(p,a,offset,b) orc_sse_emit_sysinsn_store_memoffset(p, ORC_X86_movq_store, 0, a, offset, b) +#define orc_sse_emit_movdqa_store_memoffset(p,a,offset,b) orc_sse_emit_sysinsn_store_memoffset(p, ORC_X86_movdqa_store, 0, a, offset, b) +#define orc_sse_emit_movdqu_store_memoffset(p,a,offset,b) orc_sse_emit_sysinsn_store_memoffset(p, ORC_X86_movdqu_store, 0, a, offset, b) +#define orc_sse_emit_movntdq_store_memoffset(p,a,offset,b) orc_sse_emit_sysinsn_store_memoffset(p, ORC_X86_movntdq_store, 0, a, offset, b) #endif