From 65aaf91aefc54d9370538a48eef7f659495b2437 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Tue, 6 Jul 2010 12:31:47 -0700 Subject: [PATCH] neon: Use unaligned ops instead of mask register --- orc/orcprogram-neon.c | 14 +++++++++++--- orc/orcrules-neon.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/orc/orcprogram-neon.c b/orc/orcprogram-neon.c index f27b034..b2d0a95 100644 --- a/orc/orcprogram-neon.c +++ b/orc/orcprogram-neon.c @@ -220,7 +220,9 @@ orc_compiler_neon_init (OrcCompiler *compiler) compiler->loop_shift = loop_shift; } - compiler->need_mask_regs = TRUE; + if (0) { + compiler->need_mask_regs = TRUE; + } } void @@ -670,7 +672,11 @@ orc_compiler_neon_assemble (OrcCompiler *compiler) orc_arm_emit_cmp_imm (compiler, ORC_ARM_IP, 0); orc_arm_emit_branch (compiler, ORC_ARM_COND_EQ, 3); - orc_neon_load_alignment_masks (compiler); + if (0) { + /* Disable alignment masks for now. It can easily take all available + registers. */ + orc_neon_load_alignment_masks (compiler); + } orc_arm_emit_label (compiler, 2); orc_neon_emit_loop (compiler); @@ -678,7 +684,9 @@ orc_compiler_neon_assemble (OrcCompiler *compiler) orc_arm_emit_cmp_imm (compiler, ORC_ARM_IP, 0); orc_arm_emit_branch (compiler, ORC_ARM_COND_NE, 2); - orc_neon_restore_unalignment (compiler); + if (0) { + orc_neon_restore_unalignment (compiler); + } orc_arm_emit_label (compiler, 3); diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c index c0e24d9..2135f91 100644 --- a/orc/orcrules-neon.c +++ b/orc/orcrules-neon.c @@ -245,6 +245,18 @@ orc_neon_load_vec_unaligned (OrcCompiler *compiler, OrcVariable *var, { orc_uint32 code; + ORC_ASM_CODE(compiler," vld1.8 %s, [%s]%s\n", + orc_neon_reg_name (var->alloc), + orc_arm_reg_name (var->ptr_register), + update ? "!" : ""); + code = 0xf420070d; + code |= (var->ptr_register&0xf) << 16; + code |= ((var->alloc)&0xf) << 12; + code |= (((var->alloc)>>4)&0x1) << 22; + code |= (!update) << 1; + orc_arm_emit (compiler, code); +#if 0 + /* used with need_mask_regs */ ORC_ASM_CODE(compiler," vld1.64 %s, [%s]%s\n", orc_neon_reg_name (var->aligned_data + 1), orc_arm_reg_name (var->ptr_register), @@ -267,6 +279,7 @@ orc_neon_load_vec_unaligned (OrcCompiler *compiler, OrcVariable *var, //orc_neon_emit_mov (compiler, var->alloc, var->mask_alloc); orc_neon_emit_mov (compiler, var->aligned_data, var->aligned_data + 1); +#endif } void @@ -275,6 +288,23 @@ orc_neon_load_halfvec_unaligned (OrcCompiler *compiler, OrcVariable *var, { orc_uint32 code; + ORC_ASM_CODE(compiler," vld1.8 %s, [%s]\n", + orc_neon_reg_name (var->alloc), + orc_arm_reg_name (var->ptr_register)); + code = 0xf420070d; + code |= (var->ptr_register&0xf) << 16; + code |= ((var->alloc)&0xf) << 12; + code |= (((var->alloc)>>4)&0x1) << 22; + //code |= (!update) << 1; + code |= (1) << 1; + orc_arm_emit (compiler, code); + + if (update) { + orc_arm_emit_add_imm (compiler, var->ptr_register, + var->ptr_register, 4); + } +#if 0 + /* used with need_mask_regs */ ORC_ASM_CODE(compiler," vld1.32 %s[1], [%s]%s\n", orc_neon_reg_name (var->aligned_data), orc_arm_reg_name (var->ptr_register), @@ -296,6 +326,7 @@ orc_neon_load_halfvec_unaligned (OrcCompiler *compiler, OrcVariable *var, orc_neon_emit_unary (compiler, "vrev64.i32", 0xf3b80000, var->aligned_data, var->aligned_data); +#endif } void -- 2.7.4