2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vp8/encoder/block.h"
14 void vp8_subtract_b_neon(
18 unsigned char *src_ptr, *predictor;
21 uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
22 uint16x8_t q10u16, q11u16, q12u16, q13u16;
24 src_ptr = *be->base_src + be->src;
25 src_stride = be->src_stride;
26 predictor = bd->predictor;
28 d0u8 = vld1_u8(src_ptr);
29 src_ptr += src_stride;
30 d2u8 = vld1_u8(src_ptr);
31 src_ptr += src_stride;
32 d4u8 = vld1_u8(src_ptr);
33 src_ptr += src_stride;
34 d6u8 = vld1_u8(src_ptr);
36 d1u8 = vld1_u8(predictor);
38 d3u8 = vld1_u8(predictor);
40 d5u8 = vld1_u8(predictor);
42 d7u8 = vld1_u8(predictor);
44 q10u16 = vsubl_u8(d0u8, d1u8);
45 q11u16 = vsubl_u8(d2u8, d3u8);
46 q12u16 = vsubl_u8(d4u8, d5u8);
47 q13u16 = vsubl_u8(d6u8, d7u8);
49 src_diff = be->src_diff;
50 vst1_u16((uint16_t *)src_diff, vget_low_u16(q10u16));
52 vst1_u16((uint16_t *)src_diff, vget_low_u16(q11u16));
54 vst1_u16((uint16_t *)src_diff, vget_low_u16(q12u16));
56 vst1_u16((uint16_t *)src_diff, vget_low_u16(q13u16));
60 void vp8_subtract_mby_neon(
67 uint8x16_t q0u8, q1u8, q2u8, q3u8;
68 uint16x8_t q8u16, q9u16, q10u16, q11u16;
70 for (i = 0; i < 8; i++) { // subtract_mby_loop
75 q1u8 = vld1q_u8(pred);
77 q3u8 = vld1q_u8(pred);
80 q8u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q1u8));
81 q9u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q1u8));
82 q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q3u8));
83 q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q3u8));
85 vst1q_u16((uint16_t *)diff, q8u16);
87 vst1q_u16((uint16_t *)diff, q9u16);
89 vst1q_u16((uint16_t *)diff, q10u16);
91 vst1q_u16((uint16_t *)diff, q11u16);
97 void vp8_subtract_mbuv_neon(
102 unsigned char *upred,
103 unsigned char *vpred,
106 unsigned char *src_ptr, *pred_ptr;
107 uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
108 uint16x8_t q8u16, q9u16, q10u16, q11u16;
111 for (i = 0; i < 2; i++) {
120 for (j = 0; j < 2; j++) {
121 d0u8 = vld1_u8(src_ptr);
122 src_ptr += src_stride;
123 d1u8 = vld1_u8(pred_ptr);
124 pred_ptr += pred_stride;
125 d2u8 = vld1_u8(src_ptr);
126 src_ptr += src_stride;
127 d3u8 = vld1_u8(pred_ptr);
128 pred_ptr += pred_stride;
129 d4u8 = vld1_u8(src_ptr);
130 src_ptr += src_stride;
131 d5u8 = vld1_u8(pred_ptr);
132 pred_ptr += pred_stride;
133 d6u8 = vld1_u8(src_ptr);
134 src_ptr += src_stride;
135 d7u8 = vld1_u8(pred_ptr);
136 pred_ptr += pred_stride;
138 q8u16 = vsubl_u8(d0u8, d1u8);
139 q9u16 = vsubl_u8(d2u8, d3u8);
140 q10u16 = vsubl_u8(d4u8, d5u8);
141 q11u16 = vsubl_u8(d6u8, d7u8);
143 vst1q_u16((uint16_t *)diff, q8u16);
145 vst1q_u16((uint16_t *)diff, q9u16);
147 vst1q_u16((uint16_t *)diff, q10u16);
149 vst1q_u16((uint16_t *)diff, q11u16);