Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / third_party / libvpx / source / libvpx / vp8 / common / arm / neon / reconintra_neon.c
1 /*
2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include <arm_neon.h>
12
13 #include "vp8/common/blockd.h"
14
15 void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x,
16                                            unsigned char * yabove_row,
17                                            unsigned char * yleft,
18                                            int left_stride,
19                                            unsigned char * ypred_ptr,
20                                            int y_stride) {
21   const int mode = x->mode_info_context->mbmi.mode;
22   int i;
23
24   switch (mode) {
25     case DC_PRED:
26     {
27       int shift = x->up_available + x->left_available;
28       uint8x16_t v_expected_dc = vdupq_n_u8(128);
29
30       if (shift) {
31         unsigned int average = 0;
32         int expected_dc;
33         if (x->up_available) {
34           const uint8x16_t v_above = vld1q_u8(yabove_row);
35           const uint16x8_t a = vpaddlq_u8(v_above);
36           const uint32x4_t b = vpaddlq_u16(a);
37           const uint64x2_t c = vpaddlq_u32(b);
38           const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
39                                         vreinterpret_u32_u64(vget_high_u64(c)));
40           average = vget_lane_u32(d, 0);
41         }
42         if (x->left_available) {
43           for (i = 0; i < 16; ++i) {
44               average += yleft[0];
45               yleft += left_stride;
46           }
47         }
48         shift += 3;
49         expected_dc = (average + (1 << (shift - 1))) >> shift;
50         v_expected_dc = vmovq_n_u8((uint8_t)expected_dc);
51       }
52       for (i = 0; i < 16; ++i) {
53         vst1q_u8(ypred_ptr, v_expected_dc);
54         ypred_ptr += y_stride;
55       }
56     }
57     break;
58     case V_PRED:
59     {
60       const uint8x16_t v_above = vld1q_u8(yabove_row);
61       for (i = 0; i < 16; ++i) {
62         vst1q_u8(ypred_ptr, v_above);
63         ypred_ptr += y_stride;
64       }
65     }
66     break;
67     case H_PRED:
68     {
69       for (i = 0; i < 16; ++i) {
70         const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]);
71         yleft += left_stride;
72         vst1q_u8(ypred_ptr, v_yleft);
73         ypred_ptr += y_stride;
74       }
75     }
76     break;
77     case TM_PRED:
78     {
79       const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]);
80       const uint8x16_t v_above = vld1q_u8(yabove_row);
81       for (i = 0; i < 16; ++i) {
82         const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]);
83         const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft);
84         const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft);
85         const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo),
86                                          vreinterpretq_s16_u16(v_ytop_left));
87         const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi),
88                                          vreinterpretq_s16_u16(v_ytop_left));
89         const uint8x8_t pred_lo = vqmovun_s16(b_lo);
90         const uint8x8_t pred_hi = vqmovun_s16(b_hi);
91
92         vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi));
93         ypred_ptr += y_stride;
94         yleft += left_stride;
95       }
96     }
97     break;
98   }
99 }
100
101 void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x,
102                                             unsigned char * uabove_row,
103                                             unsigned char * vabove_row,
104                                             unsigned char * uleft,
105                                             unsigned char * vleft,
106                                             int left_stride,
107                                             unsigned char * upred_ptr,
108                                             unsigned char * vpred_ptr,
109                                             int pred_stride) {
110   const int mode = x->mode_info_context->mbmi.uv_mode;
111   int i;
112
113   switch (mode) {
114     case DC_PRED:
115     {
116       int shift = x->up_available + x->left_available;
117       uint8x8_t v_expected_udc = vdup_n_u8(128);
118       uint8x8_t v_expected_vdc = vdup_n_u8(128);
119
120       if (shift) {
121         unsigned int average_u = 0;
122         unsigned int average_v = 0;
123         int expected_udc;
124         int expected_vdc;
125         if (x->up_available) {
126           const uint8x8_t v_uabove = vld1_u8(uabove_row);
127           const uint8x8_t v_vabove = vld1_u8(vabove_row);
128           const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove));
129           const uint32x4_t b = vpaddlq_u16(a);
130           const uint64x2_t c = vpaddlq_u32(b);
131           average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0);
132           average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2);
133         }
134         if (x->left_available) {
135           for (i = 0; i < 8; ++i) {
136               average_u += uleft[0];
137               uleft += left_stride;
138               average_v += vleft[0];
139               vleft += left_stride;
140           }
141         }
142         shift += 2;
143         expected_udc = (average_u + (1 << (shift - 1))) >> shift;
144         expected_vdc = (average_v + (1 << (shift - 1))) >> shift;
145         v_expected_udc = vmov_n_u8((uint8_t)expected_udc);
146         v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc);
147       }
148       for (i = 0; i < 8; ++i) {
149         vst1_u8(upred_ptr, v_expected_udc);
150         upred_ptr += pred_stride;
151         vst1_u8(vpred_ptr, v_expected_vdc);
152         vpred_ptr += pred_stride;
153       }
154     }
155     break;
156     case V_PRED:
157     {
158       const uint8x8_t v_uabove = vld1_u8(uabove_row);
159       const uint8x8_t v_vabove = vld1_u8(vabove_row);
160       for (i = 0; i < 8; ++i) {
161         vst1_u8(upred_ptr, v_uabove);
162         upred_ptr += pred_stride;
163         vst1_u8(vpred_ptr, v_vabove);
164         vpred_ptr += pred_stride;
165       }
166     }
167     break;
168     case H_PRED:
169     {
170       for (i = 0; i < 8; ++i) {
171         const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]);
172         const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]);
173         uleft += left_stride;
174         vleft += left_stride;
175         vst1_u8(upred_ptr, v_uleft);
176         upred_ptr += pred_stride;
177         vst1_u8(vpred_ptr, v_vleft);
178         vpred_ptr += pred_stride;
179       }
180     }
181     break;
182     case TM_PRED:
183     {
184       const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]);
185       const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]);
186       const uint8x8_t v_uabove = vld1_u8(uabove_row);
187       const uint8x8_t v_vabove = vld1_u8(vabove_row);
188       for (i = 0; i < 8; ++i) {
189         const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]);
190         const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]);
191         const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft);
192         const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft);
193         const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u),
194                                         vreinterpretq_s16_u16(v_utop_left));
195         const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v),
196                                         vreinterpretq_s16_u16(v_vtop_left));
197         const uint8x8_t pred_u = vqmovun_s16(b_u);
198         const uint8x8_t pred_v = vqmovun_s16(b_v);
199
200         vst1_u8(upred_ptr, pred_u);
201         vst1_u8(vpred_ptr, pred_v);
202         upred_ptr += pred_stride;
203         vpred_ptr += pred_stride;
204         uleft += left_stride;
205         vleft += left_stride;
206       }
207     }
208     break;
209   }
210 }