From c1027afe40865a9d276586a7066debe69549192b Mon Sep 17 00:00:00 2001 From: David Schleef Date: Mon, 7 Jun 2010 11:41:35 -0700 Subject: [PATCH] x86: Add alternate rep movs memcpy implementation --- orc/orcprogram-sse.c | 6 ++++ orc/orcx86.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++ orc/orcx86.h | 4 +++ 3 files changed, 99 insertions(+) diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index e58043a..3490921 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -552,6 +552,12 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) { int align_var; + if (0 && orc_x86_assemble_copy_check (compiler)) { + /* The rep movs implementation isn't faster most of the time */ + orc_x86_assemble_copy (compiler); + return; + } + align_var = get_align_var (compiler); compiler->vars[align_var].is_aligned = FALSE; diff --git a/orc/orcx86.c b/orc/orcx86.c index c410a72..7ca3dfc 100644 --- a/orc/orcx86.c +++ b/orc/orcx86.c @@ -698,6 +698,28 @@ void orc_x86_emit_emms (OrcCompiler *compiler) *compiler->codeptr++ = 0x77; } +void orc_x86_emit_rep_movs (OrcCompiler *compiler, int size) +{ + switch (size) { + case 1: + ORC_ASM_CODE(compiler," rep movsb\n"); + *compiler->codeptr++ = 0xf3; + *compiler->codeptr++ = 0xa4; + break; + case 2: + ORC_ASM_CODE(compiler," rep movsw\n"); + *compiler->codeptr++ = 0x66; + *compiler->codeptr++ = 0xf3; + *compiler->codeptr++ = 0xa5; + break; + case 4: + ORC_ASM_CODE(compiler," rep movsl\n"); + *compiler->codeptr++ = 0xf3; + *compiler->codeptr++ = 0xa5; + break; + } +} + void x86_add_fixup (OrcCompiler *compiler, unsigned char *ptr, int label, int type) { @@ -898,3 +920,70 @@ orc_x86_emit_align (OrcCompiler *compiler) } } +/* memcpy implementation based on rep movs */ + +int +orc_x86_assemble_copy_check (OrcCompiler *compiler) +{ + if (compiler->program->n_insns == 1 && + compiler->program->is_2d == FALSE && + (strcmp (compiler->program->insns[0].opcode->name, "copyb") == 0 || + strcmp (compiler->program->insns[0].opcode->name, "copyw") == 0 || + strcmp (compiler->program->insns[0].opcode->name, "copyl") == 0)) { + return TRUE; + } + + return FALSE; +} + +void +orc_x86_assemble_copy (OrcCompiler *compiler) +{ + OrcInstruction *insn; + int shift = 0; + + insn = compiler->program->insns + 0; + + if (strcmp (insn->opcode->name, "copyw") == 0) { + shift = 1; + } else if (strcmp (insn->opcode->name, "copyl") == 0) { + shift = 2; + } + + compiler->used_regs[X86_EDI] = TRUE; + compiler->used_regs[X86_ESI] = TRUE; + + orc_x86_emit_prologue (compiler); + + orc_x86_emit_mov_memoffset_reg (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor,arrays[insn->dest_args[0]]), + compiler->exec_reg, X86_EDI); + orc_x86_emit_mov_memoffset_reg (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor,arrays[insn->src_args[0]]), + compiler->exec_reg, X86_ESI); + orc_x86_emit_mov_memoffset_reg (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, + X86_ECX); + + orc_x86_emit_sar_imm_reg (compiler, 4, 2 - shift, X86_ECX); + orc_x86_emit_rep_movs (compiler, 4); + if (shift == 0) { + orc_x86_emit_mov_memoffset_reg (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, + X86_ECX); + orc_x86_emit_and_imm_reg (compiler, 4, 3, X86_ECX); + orc_x86_emit_rep_movs (compiler, 1); + } + if (shift == 1) { + orc_x86_emit_mov_memoffset_reg (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor,n), compiler->exec_reg, + X86_ECX); + orc_x86_emit_and_imm_reg (compiler, 4, 1, X86_ECX); + orc_x86_emit_rep_movs (compiler, 2); + } + + orc_x86_emit_epilogue (compiler); + + orc_x86_do_fixups (compiler); +} + diff --git a/orc/orcx86.h b/orc/orcx86.h index 0c2f763..82baee3 100644 --- a/orc/orcx86.h +++ b/orc/orcx86.h @@ -56,6 +56,7 @@ void orc_x86_emit_cmp_imm_memoffset (OrcCompiler *compiler, int size, int value, void orc_x86_emit_test_imm_memoffset (OrcCompiler *compiler, int size, int value, int offset, int reg); void orc_x86_emit_emms (OrcCompiler *compiler); +void orc_x86_emit_rep_movs (OrcCompiler *compiler, int size); void orc_x86_emit_ret (OrcCompiler *compiler); void orc_x86_emit_jle (OrcCompiler *compiler, int label); void orc_x86_emit_je (OrcCompiler *compiler, int label); @@ -72,6 +73,9 @@ void orc_x86_emit_modrm_reg (OrcCompiler *compiler, int reg1, int reg2); void orc_x86_do_fixups (OrcCompiler *compiler); +int orc_x86_assemble_copy_check (OrcCompiler *compiler); +void orc_x86_assemble_copy (OrcCompiler *compiler); + #endif -- 2.7.4