Upstream version 5.34.104.0
[platform/framework/web/crosswalk.git] / src / third_party / libvpx / source / libvpx / vp9 / common / x86 / vp9_asm_stubs.c
1 /*
2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include <assert.h>
12
13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h"
15 #include "vpx_ports/mem.h"
16
17 typedef void filter8_1dfunction (
18   const unsigned char *src_ptr,
19   const ptrdiff_t src_pitch,
20   unsigned char *output_ptr,
21   ptrdiff_t out_pitch,
22   unsigned int output_height,
23   const short *filter
24 );
25
26 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
27   void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
28                                    uint8_t *dst, ptrdiff_t dst_stride, \
29                                    const int16_t *filter_x, int x_step_q4, \
30                                    const int16_t *filter_y, int y_step_q4, \
31                                    int w, int h) { \
32   if (step_q4 == 16 && filter[3] != 128) { \
33     if (filter[0] || filter[1] || filter[2]) { \
34       while (w >= 16) { \
35         vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
36                                                  src_stride, \
37                                                  dst, \
38                                                  dst_stride, \
39                                                  h, \
40                                                  filter); \
41         src += 16; \
42         dst += 16; \
43         w -= 16; \
44       } \
45       while (w >= 8) { \
46         vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
47                                                 src_stride, \
48                                                 dst, \
49                                                 dst_stride, \
50                                                 h, \
51                                                 filter); \
52         src += 8; \
53         dst += 8; \
54         w -= 8; \
55       } \
56       while (w >= 4) { \
57         vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
58                                                 src_stride, \
59                                                 dst, \
60                                                 dst_stride, \
61                                                 h, \
62                                                 filter); \
63         src += 4; \
64         dst += 4; \
65         w -= 4; \
66       } \
67     } else { \
68       while (w >= 16) { \
69         vp9_filter_block1d16_##dir##2_##avg##opt(src, \
70                                                  src_stride, \
71                                                  dst, \
72                                                  dst_stride, \
73                                                  h, \
74                                                  filter); \
75         src += 16; \
76         dst += 16; \
77         w -= 16; \
78       } \
79       while (w >= 8) { \
80         vp9_filter_block1d8_##dir##2_##avg##opt(src, \
81                                                 src_stride, \
82                                                 dst, \
83                                                 dst_stride, \
84                                                 h, \
85                                                 filter); \
86         src += 8; \
87         dst += 8; \
88         w -= 8; \
89       } \
90       while (w >= 4) { \
91         vp9_filter_block1d4_##dir##2_##avg##opt(src, \
92                                                 src_stride, \
93                                                 dst, \
94                                                 dst_stride, \
95                                                 h, \
96                                                 filter); \
97         src += 4; \
98         dst += 4; \
99         w -= 4; \
100       } \
101     } \
102   } \
103   if (w) { \
104     vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
105                              filter_x, x_step_q4, filter_y, y_step_q4, \
106                              w, h); \
107   } \
108 }
109
110 #define FUN_CONV_2D(avg, opt) \
111 void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
112                               uint8_t *dst, ptrdiff_t dst_stride, \
113                               const int16_t *filter_x, int x_step_q4, \
114                               const int16_t *filter_y, int y_step_q4, \
115                               int w, int h) { \
116   assert(w <= 64); \
117   assert(h <= 64); \
118   if (x_step_q4 == 16 && y_step_q4 == 16) { \
119     if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
120         filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
121       DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \
122       vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
123                                 filter_x, x_step_q4, filter_y, y_step_q4, \
124                                 w, h + 7); \
125       vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
126                                       filter_x, x_step_q4, filter_y, \
127                                       y_step_q4, w, h); \
128     } else { \
129       DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \
130       vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
131                                 filter_x, x_step_q4, filter_y, y_step_q4, \
132                                 w, h + 1); \
133       vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
134                                       filter_x, x_step_q4, filter_y, \
135                                       y_step_q4, w, h); \
136     } \
137   } else { \
138     vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
139                            filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
140   } \
141 }
142 #if HAVE_AVX2
143 filter8_1dfunction vp9_filter_block1d16_v8_avx2;
144 filter8_1dfunction vp9_filter_block1d16_h8_avx2;
145 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
146 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
147 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
148 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
149 filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
150 filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
151 filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
152 filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
153 filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
154 filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
155 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
156 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
157 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
158 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
159 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
160 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
161 #define vp9_filter_block1d8_v2_avx2  vp9_filter_block1d8_v2_ssse3
162 #define vp9_filter_block1d8_h2_avx2  vp9_filter_block1d8_h2_ssse3
163 #define vp9_filter_block1d4_v2_avx2  vp9_filter_block1d4_v2_ssse3
164 #define vp9_filter_block1d4_h2_avx2  vp9_filter_block1d4_h2_ssse3
165 // void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
166 //                                uint8_t *dst, ptrdiff_t dst_stride,
167 //                                const int16_t *filter_x, int x_step_q4,
168 //                                const int16_t *filter_y, int y_step_q4,
169 //                                int w, int h);
170 // void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
171 //                               uint8_t *dst, ptrdiff_t dst_stride,
172 //                               const int16_t *filter_x, int x_step_q4,
173 //                               const int16_t *filter_y, int y_step_q4,
174 //                               int w, int h);
175 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
176 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
177
178 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
179 //                          uint8_t *dst, ptrdiff_t dst_stride,
180 //                          const int16_t *filter_x, int x_step_q4,
181 //                          const int16_t *filter_y, int y_step_q4,
182 //                          int w, int h);
183 FUN_CONV_2D(, avx2);
184 #endif
185 #if HAVE_SSSE3
186 filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
187 filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
188 filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
189 filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
190 filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
191 filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
192 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
193 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
194 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
195 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
196 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
197 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
198
199 filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
200 filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
201 filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
202 filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
203 filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
204 filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
205 filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
206 filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
207 filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
208 filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
209 filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
210 filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
211
212 // void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
213 //                                uint8_t *dst, ptrdiff_t dst_stride,
214 //                                const int16_t *filter_x, int x_step_q4,
215 //                                const int16_t *filter_y, int y_step_q4,
216 //                                int w, int h);
217 // void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
218 //                               uint8_t *dst, ptrdiff_t dst_stride,
219 //                               const int16_t *filter_x, int x_step_q4,
220 //                               const int16_t *filter_y, int y_step_q4,
221 //                               int w, int h);
222 // void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
223 //                                    uint8_t *dst, ptrdiff_t dst_stride,
224 //                                    const int16_t *filter_x, int x_step_q4,
225 //                                    const int16_t *filter_y, int y_step_q4,
226 //                                    int w, int h);
227 // void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
228 //                                   uint8_t *dst, ptrdiff_t dst_stride,
229 //                                   const int16_t *filter_x, int x_step_q4,
230 //                                   const int16_t *filter_y, int y_step_q4,
231 //                                   int w, int h);
232 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
233 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
234 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
235 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
236             ssse3);
237
238 // void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
239 //                          uint8_t *dst, ptrdiff_t dst_stride,
240 //                          const int16_t *filter_x, int x_step_q4,
241 //                          const int16_t *filter_y, int y_step_q4,
242 //                          int w, int h);
243 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
244 //                              uint8_t *dst, ptrdiff_t dst_stride,
245 //                              const int16_t *filter_x, int x_step_q4,
246 //                              const int16_t *filter_y, int y_step_q4,
247 //                              int w, int h);
248 FUN_CONV_2D(, ssse3);
249 FUN_CONV_2D(avg_ , ssse3);
250 #endif
251
252 #if HAVE_SSE2
253 filter8_1dfunction vp9_filter_block1d16_v8_sse2;
254 filter8_1dfunction vp9_filter_block1d16_h8_sse2;
255 filter8_1dfunction vp9_filter_block1d8_v8_sse2;
256 filter8_1dfunction vp9_filter_block1d8_h8_sse2;
257 filter8_1dfunction vp9_filter_block1d4_v8_sse2;
258 filter8_1dfunction vp9_filter_block1d4_h8_sse2;
259 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2;
260 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2;
261 filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2;
262 filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2;
263 filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2;
264 filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2;
265
266 filter8_1dfunction vp9_filter_block1d16_v2_sse2;
267 filter8_1dfunction vp9_filter_block1d16_h2_sse2;
268 filter8_1dfunction vp9_filter_block1d8_v2_sse2;
269 filter8_1dfunction vp9_filter_block1d8_h2_sse2;
270 filter8_1dfunction vp9_filter_block1d4_v2_sse2;
271 filter8_1dfunction vp9_filter_block1d4_h2_sse2;
272 filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
273 filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
274 filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
275 filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
276 filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
277 filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
278
279 // void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
280 //                               uint8_t *dst, ptrdiff_t dst_stride,
281 //                               const int16_t *filter_x, int x_step_q4,
282 //                               const int16_t *filter_y, int y_step_q4,
283 //                               int w, int h);
284 // void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
285 //                              uint8_t *dst, ptrdiff_t dst_stride,
286 //                              const int16_t *filter_x, int x_step_q4,
287 //                              const int16_t *filter_y, int y_step_q4,
288 //                              int w, int h);
289 // void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
290 //                                   uint8_t *dst, ptrdiff_t dst_stride,
291 //                                   const int16_t *filter_x, int x_step_q4,
292 //                                   const int16_t *filter_y, int y_step_q4,
293 //                                   int w, int h);
294 // void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
295 //                                  uint8_t *dst, ptrdiff_t dst_stride,
296 //                                  const int16_t *filter_x, int x_step_q4,
297 //                                  const int16_t *filter_y, int y_step_q4,
298 //                                  int w, int h);
299 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
300 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
301 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
302 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
303
304 // void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
305 //                         uint8_t *dst, ptrdiff_t dst_stride,
306 //                         const int16_t *filter_x, int x_step_q4,
307 //                         const int16_t *filter_y, int y_step_q4,
308 //                         int w, int h);
309 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
310 //                             uint8_t *dst, ptrdiff_t dst_stride,
311 //                             const int16_t *filter_x, int x_step_q4,
312 //                             const int16_t *filter_y, int y_step_q4,
313 //                             int w, int h);
314 FUN_CONV_2D(, sse2);
315 FUN_CONV_2D(avg_ , sse2);
316 #endif