2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_mse16x16_neon|
13 EXPORT |vp8_get4x4sse_cs_neon|
19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ;============================
21 ; r0 unsigned char *src_ptr
22 ; r1 int source_stride
23 ; r2 unsigned char *ref_ptr
25 ; stack unsigned int *sse
26 ;note: in this function, sum is never used. So, we can remove this part of calculation
29 |vp8_mse16x16_neon| PROC
32 vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse
40 vld1.8 {q0}, [r0], r1 ;Load up source and reference
50 vmlal.s16 q7, d22, d22
51 vmlal.s16 q8, d23, d23
55 vmlal.s16 q9, d24, d24
56 vmlal.s16 q10, d25, d25
57 vmlal.s16 q7, d26, d26
58 vmlal.s16 q8, d27, d27
59 vmlal.s16 q9, d28, d28
60 vmlal.s16 q10, d29, d29
62 bne mse16x16_neon_loop
67 ldr r12, [sp, #16] ;load *sse from stack
73 vst1.32 {d0[0]}, [r12]
82 ;=============================
83 ; r0 unsigned char *src_ptr,
84 ; r1 int source_stride,
85 ; r2 unsigned char *ref_ptr,
87 |vp8_get4x4sse_cs_neon| PROC
90 vld1.8 {d0}, [r0], r1 ;Load up source and reference
104 vmull.s16 q7, d22, d22
105 vmull.s16 q8, d24, d24
106 vmull.s16 q9, d26, d26
107 vmull.s16 q10, d28, d28