From dfeaefab66b89b4084c8d793bafa04b589634f34 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Tue, 31 Mar 2009 22:11:46 -0700 Subject: [PATCH] Allow reading from dest --- orc/orccompiler.c | 4 ++++ orc/orcprogram-sse.c | 10 +++++++--- orc/x86.c | 23 ++++++++++++++++------- orc/x86.h | 2 +- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/orc/orccompiler.c b/orc/orccompiler.c index 512d7d8..6db5493 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -149,9 +149,11 @@ orc_compiler_rewrite_vars (OrcCompiler *compiler) if (opcode->src_size[k] == 0) continue; var = insn->src_args[k]; +#if 0 if (compiler->vars[var].vartype == ORC_VAR_TYPE_DEST) { ORC_PROGRAM_ERROR(compiler, "using dest var as source"); } +#endif actual_var = var; if (compiler->vars[var].replaced) { @@ -194,9 +196,11 @@ orc_compiler_rewrite_vars (OrcCompiler *compiler) compiler->vars[actual_var].used = TRUE; compiler->vars[actual_var].first_use = j; } else { +#if 0 if (compiler->vars[var].vartype == ORC_VAR_TYPE_DEST) { ORC_PROGRAM_ERROR(compiler,"writing dest more than once"); } +#endif if (compiler->vars[var].vartype == ORC_VAR_TYPE_TEMP) { actual_var = orc_compiler_dup_temporary (compiler, var, j); compiler->vars[var].replaced = TRUE; diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 9af1047..1db7892 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -164,13 +164,16 @@ sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var) x86_emit_mov_reg_sse (compiler, X86_ECX, var->alloc); break; case 4: - x86_emit_mov_memoffset_sse (compiler, 4, 0, ptr_reg, var->alloc); + x86_emit_mov_memoffset_sse (compiler, 4, 0, ptr_reg, var->alloc, + var->is_aligned); break; case 8: - x86_emit_mov_memoffset_sse (compiler, 8, 0, ptr_reg, var->alloc); + x86_emit_mov_memoffset_sse (compiler, 8, 0, ptr_reg, var->alloc, + var->is_aligned); break; case 16: - x86_emit_mov_memoffset_sse (compiler, 16, 0, ptr_reg, var->alloc); + x86_emit_mov_memoffset_sse (compiler, 16, 0, ptr_reg, var->alloc, + var->is_aligned); break; default: ORC_PROGRAM_ERROR(compiler,"bad load size %d", @@ -405,6 +408,7 @@ sse_emit_loop (OrcCompiler *compiler) switch (var->vartype) { case ORC_VAR_TYPE_SRC: + case ORC_VAR_TYPE_DEST: sse_emit_load_src (compiler, var); break; case ORC_VAR_TYPE_CONST: diff --git a/orc/x86.c b/orc/x86.c index 5f2855b..bda38d7 100644 --- a/orc/x86.c +++ b/orc/x86.c @@ -287,7 +287,7 @@ x86_emit_mov_memoffset_mmx (OrcCompiler *compiler, int size, int offset, void x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset, - int reg1, int reg2) + int reg1, int reg2, int is_aligned) { switch (size) { case 4: @@ -306,12 +306,21 @@ x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset, *compiler->codeptr++ = 0x7e; break; case 16: - ORC_ASM_CODE(compiler," movdqu %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1), - x86_get_regname_sse(reg2)); - *compiler->codeptr++ = 0xf3; - x86_emit_rex(compiler, 0, reg2, 0, reg1); - *compiler->codeptr++ = 0x0f; - *compiler->codeptr++ = 0x6f; + if (is_aligned) { + ORC_ASM_CODE(compiler," movdqa %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1), + x86_get_regname_sse(reg2)); + *compiler->codeptr++ = 0x66; + x86_emit_rex(compiler, 0, reg2, 0, reg1); + *compiler->codeptr++ = 0x0f; + *compiler->codeptr++ = 0x6f; + } else { + ORC_ASM_CODE(compiler," movdqu %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1), + x86_get_regname_sse(reg2)); + *compiler->codeptr++ = 0xf3; + x86_emit_rex(compiler, 0, reg2, 0, reg1); + *compiler->codeptr++ = 0x0f; + *compiler->codeptr++ = 0x6f; + } break; default: ORC_PROGRAM_ERROR(compiler, "bad size"); diff --git a/orc/x86.h b/orc/x86.h index f67fbae..1685f18 100644 --- a/orc/x86.h +++ b/orc/x86.h @@ -14,7 +14,7 @@ void x86_emit_mov_memoffset_reg (OrcCompiler *compiler, int size, int offset, in void x86_emit_mov_memoffset_mmx (OrcCompiler *compiler, int size, int offset, int reg1, int reg2); void x86_emit_mov_memoffset_sse (OrcCompiler *compiler, int size, int offset, - int reg1, int reg2); + int reg1, int reg2, int is_aligned); void x86_emit_mov_reg_memoffset (OrcCompiler *compiler, int size, int reg1, int offset, int reg2); void x86_emit_mov_mmx_memoffset (OrcCompiler *compiler, int size, int reg1, int offset, int reg2); -- 2.7.4