2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 unsigned int vp8_sad8x8_neon(
14 unsigned char *src_ptr,
16 unsigned char *ref_ptr,
25 d0 = vld1_u8(src_ptr);
26 src_ptr += src_stride;
27 d8 = vld1_u8(ref_ptr);
28 ref_ptr += ref_stride;
29 q12 = vabdl_u8(d0, d8);
31 for (i = 0; i < 7; i++) {
32 d0 = vld1_u8(src_ptr);
33 src_ptr += src_stride;
34 d8 = vld1_u8(ref_ptr);
35 ref_ptr += ref_stride;
36 q12 = vabal_u8(q12, d0, d8);
39 q1 = vpaddlq_u16(q12);
41 d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
42 vreinterpret_u32_u64(vget_high_u64(q3)));
44 return vget_lane_u32(d5, 0);
47 unsigned int vp8_sad8x16_neon(
48 unsigned char *src_ptr,
50 unsigned char *ref_ptr,
59 d0 = vld1_u8(src_ptr);
60 src_ptr += src_stride;
61 d8 = vld1_u8(ref_ptr);
62 ref_ptr += ref_stride;
63 q12 = vabdl_u8(d0, d8);
65 for (i = 0; i < 15; i++) {
66 d0 = vld1_u8(src_ptr);
67 src_ptr += src_stride;
68 d8 = vld1_u8(ref_ptr);
69 ref_ptr += ref_stride;
70 q12 = vabal_u8(q12, d0, d8);
73 q1 = vpaddlq_u16(q12);
75 d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
76 vreinterpret_u32_u64(vget_high_u64(q3)));
78 return vget_lane_u32(d5, 0);
81 unsigned int vp8_sad4x4_neon(
82 unsigned char *src_ptr,
84 unsigned char *ref_ptr,
92 d0 = vld1_u8(src_ptr);
93 src_ptr += src_stride;
94 d8 = vld1_u8(ref_ptr);
95 ref_ptr += ref_stride;
96 q12 = vabdl_u8(d0, d8);
98 for (i = 0; i < 3; i++) {
99 d0 = vld1_u8(src_ptr);
100 src_ptr += src_stride;
101 d8 = vld1_u8(ref_ptr);
102 ref_ptr += ref_stride;
103 q12 = vabal_u8(q12, d0, d8);
106 d1 = vpaddl_u16(vget_low_u16(q12));
109 return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
112 unsigned int vp8_sad16x16_neon(
113 unsigned char *src_ptr,
115 unsigned char *ref_ptr,
124 q0 = vld1q_u8(src_ptr);
125 src_ptr += src_stride;
126 q4 = vld1q_u8(ref_ptr);
127 ref_ptr += ref_stride;
128 q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
129 q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
131 for (i = 0; i < 15; i++) {
132 q0 = vld1q_u8(src_ptr);
133 src_ptr += src_stride;
134 q4 = vld1q_u8(ref_ptr);
135 ref_ptr += ref_stride;
136 q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
137 q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
140 q12 = vaddq_u16(q12, q13);
141 q1 = vpaddlq_u16(q12);
142 q3 = vpaddlq_u32(q1);
143 d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
144 vreinterpret_u32_u64(vget_high_u64(q3)));
146 return vget_lane_u32(d5, 0);
149 unsigned int vp8_sad16x8_neon(
150 unsigned char *src_ptr,
152 unsigned char *ref_ptr,
161 q0 = vld1q_u8(src_ptr);
162 src_ptr += src_stride;
163 q4 = vld1q_u8(ref_ptr);
164 ref_ptr += ref_stride;
165 q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
166 q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
168 for (i = 0; i < 7; i++) {
169 q0 = vld1q_u8(src_ptr);
170 src_ptr += src_stride;
171 q4 = vld1q_u8(ref_ptr);
172 ref_ptr += ref_stride;
173 q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
174 q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
177 q12 = vaddq_u16(q12, q13);
178 q1 = vpaddlq_u16(q12);
179 q3 = vpaddlq_u32(q1);
180 d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
181 vreinterpret_u32_u64(vget_high_u64(q3)));
183 return vget_lane_u32(d5, 0);