From d91e34d459c65c5603ec1293078a6a6400d4a455 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Mon, 15 Jun 2009 11:25:20 -0700 Subject: [PATCH] sse: improve alignment intro and outtro --- orc/orcprogram-sse.c | 50 ++++++++++++++++++++++++++------------------------ orc/orcx86.c | 28 ++++++++++++++++++++++++++++ orc/orcx86.h | 2 ++ 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 76a9ebe..d98dcbc 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -458,20 +458,23 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) if (compiler->loop_shift > 0) { int save_loop_shift; - - orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); - orc_x86_emit_je (compiler, 1); + int l; save_loop_shift = compiler->loop_shift; - compiler->loop_shift = 0; + compiler->vars[align_var].is_aligned = FALSE; + + for (l=0;lloop_shift = l; + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); + + orc_x86_emit_test_imm_memoffset (compiler, 4, 1<loop_shift, + (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); + orc_x86_emit_je (compiler, 12 + compiler->loop_shift); + orc_sse_emit_loop (compiler); + orc_x86_emit_label (compiler, 12 + compiler->loop_shift); + } - orc_x86_emit_label (compiler, 0); - orc_sse_emit_loop (compiler); - orc_x86_emit_dec_memoffset (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), - compiler->exec_reg); - orc_x86_emit_jne (compiler, 0); + compiler->loop_shift = save_loop_shift; compiler->loop_shift = save_loop_shift; compiler->vars[align_var].is_aligned = TRUE; @@ -483,6 +486,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); orc_x86_emit_je (compiler, 3); + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); orc_x86_emit_align (compiler); orc_x86_emit_label (compiler, 2); orc_sse_emit_loop (compiler); @@ -494,23 +498,21 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) if (compiler->loop_shift > 0) { int save_loop_shift; - - compiler->vars[align_var].is_aligned = FALSE; - orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); - orc_x86_emit_je (compiler, 5); + int l; save_loop_shift = compiler->loop_shift; - compiler->loop_shift = 0; + compiler->vars[align_var].is_aligned = FALSE; - orc_x86_emit_label (compiler, 4); - orc_sse_emit_loop (compiler); - orc_x86_emit_dec_memoffset (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), - compiler->exec_reg); - orc_x86_emit_jne (compiler, 4); + for(l=save_loop_shift - 1; l >= 0; l--) { + compiler->loop_shift = l; + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); - orc_x86_emit_label (compiler, 5); + orc_x86_emit_test_imm_memoffset (compiler, 4, 1<loop_shift, + (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); + orc_x86_emit_je (compiler, 8 + compiler->loop_shift); + orc_sse_emit_loop (compiler); + orc_x86_emit_label (compiler, 8 + compiler->loop_shift); + } compiler->loop_shift = save_loop_shift; } diff --git a/orc/orcx86.c b/orc/orcx86.c index 5c13b86..f0c3de6 100644 --- a/orc/orcx86.c +++ b/orc/orcx86.c @@ -596,6 +596,34 @@ orc_x86_emit_cmp_imm_memoffset (OrcCompiler *compiler, int size, int value, } void +orc_x86_emit_test_imm_memoffset (OrcCompiler *compiler, int size, int value, + int offset, int reg) +{ + if (size == 2) { + ORC_ASM_CODE(compiler," testw $%d, %d(%%%s)\n", value, offset, + orc_x86_get_regname_ptr(compiler, reg)); + *compiler->codeptr++ = 0x66; + } else if (size == 4) { + ORC_ASM_CODE(compiler," testl $%d, %d(%%%s)\n", value, offset, + orc_x86_get_regname_ptr(compiler, reg)); + } else { + ORC_ASM_CODE(compiler," test $%d, %d(%%%s)\n", value, offset, + orc_x86_get_regname_ptr(compiler, reg)); + } + + orc_x86_emit_rex(compiler, size, 0, 0, reg); + + *compiler->codeptr++ = 0xf7; + orc_x86_emit_modrm_memoffset (compiler, 0, offset, reg); + *compiler->codeptr++ = (value & 0xff); + *compiler->codeptr++ = ((value>>8) & 0xff); + if (size == 4) { + *compiler->codeptr++ = ((value>>16) & 0xff); + *compiler->codeptr++ = ((value>>24) & 0xff); + } +} + +void orc_x86_emit_dec_memoffset (OrcCompiler *compiler, int size, int offset, int reg) { diff --git a/orc/orcx86.h b/orc/orcx86.h index 1e70d62..ea01fe5 100644 --- a/orc/orcx86.h +++ b/orc/orcx86.h @@ -50,6 +50,8 @@ void orc_x86_emit_cmp_reg_memoffset (OrcCompiler *compiler, int size, int reg1, int offset, int reg); void orc_x86_emit_cmp_imm_memoffset (OrcCompiler *compiler, int size, int value, int offset, int reg); +void orc_x86_emit_test_imm_memoffset (OrcCompiler *compiler, int size, int value, + int offset, int reg); void orc_x86_emit_emms (OrcCompiler *compiler); void orc_x86_emit_ret (OrcCompiler *compiler); void orc_x86_emit_jle (OrcCompiler *compiler, int label); -- 2.7.4