Wrapper function removed from vp8_subtract_b_neon function call
authorTero Rintaluoma <teror@google.com>
Wed, 30 Mar 2011 10:45:59 +0000 (13:45 +0300)
committerTero Rintaluoma <teror@google.com>
Fri, 1 Apr 2011 07:06:44 +0000 (10:06 +0300)
Address calculations moved from encodemb_arm.c file to neon
optimized assembly function to save cycles in function calls.
 - vp8_subtract_b_neon_func replaced with vp8_subtract_b_neon
   that contains all needed address calculations
 - unnecessary file encodemb_arm.c removed
 - consistent with ARMv6 optimized version

Change-Id: I6cbc1a2670b56c2077f59995fcf8f70786b4990b

vp8/encoder/arm/encodemb_arm.c [deleted file]
vp8/encoder/arm/neon/subtract_neon.asm
vp8/encoder/asm_enc_offsets.c
vp8/vp8cx_arm.mk

diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c
deleted file mode 100644 (file)
index 88ad3fc..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vp8/encoder/encodemb.h"
-#include "vp8/common/reconinter.h"
-#include "vp8/encoder/quantize.h"
-#include "vp8/common/invtrans.h"
-#include "vp8/common/recon.h"
-#include "vp8/common/reconintra.h"
-#include "vp8/encoder/dct.h"
-#include "vpx_mem/vpx_mem.h"
-
-extern void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch);
-
-void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch)
-{
-    unsigned char *src_ptr = (*(be->base_src) + be->src);
-    short *diff_ptr = be->src_diff;
-    unsigned char *pred_ptr = bd->predictor;
-    int src_stride = be->src_stride;
-
-    vp8_subtract_b_neon_func(diff_ptr, src_ptr, pred_ptr, src_stride, pitch);
-}
index 3ea00f8..68c2950 100644 (file)
@@ -8,45 +8,58 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-
-    EXPORT |vp8_subtract_b_neon_func|
+    EXPORT |vp8_subtract_b_neon|
     EXPORT |vp8_subtract_mby_neon|
     EXPORT |vp8_subtract_mbuv_neon|
 
+    INCLUDE asm_enc_offsets.asm
+
     ARM
     REQUIRE8
     PRESERVE8
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
-;=========================================
-;void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch);
-|vp8_subtract_b_neon_func| PROC
-    ldr             r12, [sp]               ;load pitch
-
-    vld1.8          {d0}, [r1], r3          ;load src
-    vld1.8          {d1}, [r2], r12         ;load pred
-    vld1.8          {d2}, [r1], r3
-    vld1.8          {d3}, [r2], r12
-    vld1.8          {d4}, [r1], r3
-    vld1.8          {d5}, [r2], r12
-    vld1.8          {d6}, [r1], r3
-    vld1.8          {d7}, [r2], r12
+
+;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch)
+|vp8_subtract_b_neon| PROC
+
+    stmfd   sp!, {r4-r7}
+
+    ldr     r3, [r0, #vp8_block_base_src]
+    ldr     r4, [r0, #vp8_block_src]
+    ldr     r5, [r0, #vp8_block_src_diff]
+    ldr     r3, [r3]
+    ldr     r6, [r0, #vp8_block_src_stride]
+    add     r3, r3, r4                      ; src = *base_src + src
+    ldr     r7, [r1, #vp8_blockd_predictor]
+
+    vld1.8          {d0}, [r3], r6          ;load src
+    vld1.8          {d1}, [r7], r2          ;load pred
+    vld1.8          {d2}, [r3], r6
+    vld1.8          {d3}, [r7], r2
+    vld1.8          {d4}, [r3], r6
+    vld1.8          {d5}, [r7], r2
+    vld1.8          {d6}, [r3], r6
+    vld1.8          {d7}, [r7], r2
 
     vsubl.u8        q10, d0, d1
     vsubl.u8        q11, d2, d3
     vsubl.u8        q12, d4, d5
     vsubl.u8        q13, d6, d7
 
-    mov             r12, r12, lsl #1
+    mov             r2, r2, lsl #1
 
-    vst1.16         {d20}, [r0], r12        ;store diff
-    vst1.16         {d22}, [r0], r12
-    vst1.16         {d24}, [r0], r12
-    vst1.16         {d26}, [r0], r12
+    vst1.16         {d20}, [r5], r2         ;store diff
+    vst1.16         {d22}, [r5], r2
+    vst1.16         {d24}, [r5], r2
+    vst1.16         {d26}, [r5], r2
 
+    ldmfd   sp!, {r4-r7}
     bx              lr
+
     ENDP
 
+
 ;==========================================
 ;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride)
 |vp8_subtract_mby_neon| PROC
index c7983c1..9c81c8d 100644 (file)
@@ -48,6 +48,14 @@ DEFINE(vp8_blockd_dequant,                      offsetof(BLOCKD, dequant));
 DEFINE(vp8_blockd_dqcoeff,                      offsetof(BLOCKD, dqcoeff));
 DEFINE(vp8_blockd_eob,                          offsetof(BLOCKD, eob));
 
+// subtract
+DEFINE(vp8_block_base_src,                      offsetof(BLOCK, base_src));
+DEFINE(vp8_block_src,                           offsetof(BLOCK, src));
+DEFINE(vp8_block_src_diff,                      offsetof(BLOCK, src_diff));
+DEFINE(vp8_block_src_stride,                    offsetof(BLOCK, src_stride));
+
+DEFINE(vp8_blockd_predictor,                    offsetof(BLOCKD, predictor));
+
 //pack tokens
 DEFINE(vp8_writer_lowvalue,                     offsetof(vp8_writer, lowvalue));
 DEFINE(vp8_writer_range,                        offsetof(vp8_writer, range));
index 349c3fd..fd1c773 100644 (file)
@@ -15,7 +15,6 @@
 # encoder
 VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/arm_csystemdependent.c
 
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/dct_arm.c