From d7ce7adbd21c84d8a4d233d3e6010bffe2f6efa3 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Wed, 15 Sep 2010 17:50:44 -0700 Subject: [PATCH] sse: implement 64-bit params and constants --- orc/orcprogram-sse.c | 30 ++++++++++++++++++++++- orc/orcrules-sse.c | 69 ++++++++++++++++++++++++++++++++++------------------ orc/orcsse.c | 13 ++++++++++ orc/orcsse.h | 5 +++- 4 files changed, 92 insertions(+), 25 deletions(-) diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 89b3bbd..f4d85cb 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -341,8 +341,36 @@ sse_save_accumulators (OrcCompiler *compiler) void sse_load_constant (OrcCompiler *compiler, int reg, int size, int value) { + orc_sse_load_constant (compiler, reg, size, value); +} + +void +orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, orc_uint64 value) +{ int i; + if (size == 8) { + int offset = ORC_STRUCT_OFFSET(OrcExecutor,arrays[ORC_VAR_T1]); + + /* FIXME how ugly and slow! */ + orc_x86_emit_mov_imm_reg (compiler, 4, value>>0, + compiler->gp_tmpreg); + orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, + offset + 0, compiler->exec_reg); + + orc_x86_emit_mov_imm_reg (compiler, 4, value>>32, + compiler->gp_tmpreg); + orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg, + offset + 4, compiler->exec_reg); + + orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, compiler->exec_reg, + reg, FALSE); +#ifndef MMX + orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,0,1,0), reg, reg); +#endif + return; + } + if (size == 1) { value &= 0xff; value |= (value << 8); @@ -353,7 +381,7 @@ sse_load_constant (OrcCompiler *compiler, int reg, int size, int value) value |= (value << 16); } - ORC_ASM_CODE(compiler, "# loading constant %d 0x%08x\n", value, value); + ORC_ASM_CODE(compiler, "# loading constant %d 0x%08x\n", (int)value, (int)value); if (value == 0) { orc_sse_emit_pxor(compiler, reg, reg); return; diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index ef0830c..04a3e31 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -28,32 +28,55 @@ sse_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn) reg = dest->alloc; if (size == 8 && src->size == 8) { - ORC_COMPILER_ERROR(compiler,"64-bit parameters not implemented"); - } - - orc_x86_emit_mov_memoffset_sse (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), - compiler->exec_reg, reg, FALSE); - if (size == 1) { - orc_sse_emit_punpcklbw (compiler, reg, reg); - } + orc_x86_emit_mov_memoffset_sse (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), + compiler->exec_reg, reg, FALSE); + if (0) { + /* FIXME yes, I understand this is terrible */ + orc_sse_emit_pinsrw_memoffset (compiler, 2, + (int)ORC_STRUCT_OFFSET(OrcExecutor, + params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]) + 0, + compiler->exec_reg, reg); + orc_sse_emit_pinsrw_memoffset (compiler, 3, + (int)ORC_STRUCT_OFFSET(OrcExecutor, + params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]) + 1, + compiler->exec_reg, reg); + orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,0,1,0), reg, reg); + } else { + orc_x86_emit_movhps_memoffset_sse (compiler, + (int)ORC_STRUCT_OFFSET(OrcExecutor, + params[insn->src_args[0] + (ORC_VAR_T1 - ORC_VAR_P1)]), + compiler->exec_reg, reg); + orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(2,0,2,0), reg, reg); + } + } else { + orc_x86_emit_mov_memoffset_sse (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[0]]), + compiler->exec_reg, reg, FALSE); + if (size < 8) { + if (size == 1) { + orc_sse_emit_punpcklbw (compiler, reg, reg); + } #ifndef MMX - if (size <= 2) { - orc_sse_emit_pshuflw (compiler, 0, reg, reg); - } - orc_sse_emit_pshufd (compiler, 0, reg, reg); + if (size <= 2) { + orc_sse_emit_pshuflw (compiler, 0, reg, reg); + } + orc_sse_emit_pshufd (compiler, 0, reg, reg); #else - if (size <= 2) { - orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), reg, reg); - } else { - orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,0,1,0), reg, reg); - } + if (size <= 2) { + orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(0,0,0,0), reg, reg); + } else { + orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,0,1,0), reg, reg); + } #endif - } else if (src->vartype == ORC_VAR_TYPE_CONST) { - if (size == 8 && src->size == 8) { - ORC_COMPILER_ERROR(compiler,"64-bit constants not implemented"); + } else { +#ifndef MMX + orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,0,1,0), reg, reg); +#endif + } } - sse_load_constant (compiler, dest->alloc, size, src->value.i); + } else if (src->vartype == ORC_VAR_TYPE_CONST) { + orc_sse_load_constant (compiler, dest->alloc, size, src->value.i); } else { ORC_ASSERT(0); } @@ -1200,7 +1223,7 @@ sse_rule_divluw (OrcCompiler *p, void *user, OrcInstruction *insn) orc_sse_emit_psllw (p, 8, divisor); orc_sse_emit_psrlw (p, 1, divisor); - sse_load_constant (p, a, 2, 0x00ff); + orc_sse_load_constant (p, a, 2, 0x00ff); tmp = orc_compiler_get_constant (p, 2, 0x8000); orc_sse_emit_movdqa (p, tmp, j); orc_sse_emit_psrlw (p, 8, j); diff --git a/orc/orcsse.c b/orc/orcsse.c index f812075..6e932e4 100644 --- a/orc/orcsse.c +++ b/orc/orcsse.c @@ -244,6 +244,19 @@ orc_x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset, } void +orc_x86_emit_movhps_memoffset_sse (OrcCompiler *compiler, int offset, + int reg1, int reg2) +{ + ORC_ASM_CODE(compiler," movhps %d(%%%s), %%%s\n", offset, + orc_x86_get_regname_ptr(compiler, reg1), + orc_x86_get_regname_sse(reg2)); + orc_x86_emit_rex(compiler, 0, reg2, 0, reg1); + *compiler->codeptr++ = 0x0f; + *compiler->codeptr++ = 0x16; + orc_x86_emit_modrm_memoffset (compiler, reg2, offset, reg1); +} + +void orc_x86_emit_mov_memindex_sse (OrcCompiler *compiler, int size, int offset, int reg1, int regindex, int shift, int reg2, int is_aligned) { diff --git a/orc/orcsse.h b/orc/orcsse.h index e662824..6b58e7f 100644 --- a/orc/orcsse.h +++ b/orc/orcsse.h @@ -41,6 +41,8 @@ typedef enum { const char * orc_x86_get_regname_sse(int i); void orc_x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset, int reg1, int reg2, int is_aligned); +void orc_x86_emit_movhps_memoffset_sse (OrcCompiler *compiler, int offset, + int reg1, int reg2); void orc_x86_emit_mov_memindex_sse (OrcCompiler *compiler, int size, int offset, int reg1, int regindex, int shift, int reg2, int is_aligned); void orc_x86_emit_mov_sse_memoffset (OrcCompiler *compiler, int size, int reg1, int offset, @@ -76,7 +78,8 @@ void orc_sse_emit_shiftimm (OrcCompiler *p, const char *insn_name, void orc_sse_set_mxcsr (OrcCompiler *compiler); void orc_sse_restore_mxcsr (OrcCompiler *compiler); -void sse_load_constant (OrcCompiler *compiler, int reg, int size, int value); +void orc_sse_load_constant (OrcCompiler *compiler, int reg, int size, + orc_uint64 value); unsigned int orc_sse_get_cpu_flags (void); -- 2.7.4