Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / ffmpeg / libavcodec / x86 / qpeldsp_init.c
1 /*
2  * quarterpel DSP functions
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 #include <stddef.h>
24 #include <stdint.h>
25
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/pixels.h"
31 #include "libavcodec/qpeldsp.h"
32 #include "fpel.h"
33
34 void ff_put_pixels8_l2_mmxext(uint8_t *dst,
35                               const uint8_t *src1, const uint8_t *src2,
36                               int dstStride, int src1Stride, int h);
37 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
38                                      const uint8_t *src1, const uint8_t *src2,
39                                      int dstStride, int src1Stride, int h);
40 void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
41                               const uint8_t *src1, const uint8_t *src2,
42                               int dstStride, int src1Stride, int h);
43 void ff_put_pixels16_l2_mmxext(uint8_t *dst,
44                                const uint8_t *src1, const uint8_t *src2,
45                                int dstStride, int src1Stride, int h);
46 void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
47                                const uint8_t *src1, const uint8_t *src2,
48                                int dstStride, int src1Stride, int h);
49 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
50                                       const uint8_t *src1, const uint8_t *src2,
51                                       int dstStride, int src1Stride, int h);
52 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53                                           int dstStride, int srcStride, int h);
54 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
55                                           int dstStride, int srcStride, int h);
56 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
57                                                  const uint8_t *src,
58                                                  int dstStride, int srcStride,
59                                                  int h);
60 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61                                          int dstStride, int srcStride, int h);
62 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
63                                          int dstStride, int srcStride, int h);
64 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
65                                                 const uint8_t *src,
66                                                 int dstStride, int srcStride,
67                                                 int h);
68 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69                                           int dstStride, int srcStride);
70 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
71                                           int dstStride, int srcStride);
72 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
73                                                  const uint8_t *src,
74                                                  int dstStride, int srcStride);
75 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76                                          int dstStride, int srcStride);
77 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
78                                          int dstStride, int srcStride);
79 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
80                                                 const uint8_t *src,
81                                                 int dstStride, int srcStride);
82 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
83 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
84
85 #if HAVE_YASM
86
87 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
88 #define ff_put_pixels8_mmxext  ff_put_pixels8_mmx
89
90 #define QPEL_OP(OPNAME, RND, MMX)                                       \
91 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst,                  \
92                                          const uint8_t *src,            \
93                                          ptrdiff_t stride)              \
94 {                                                                       \
95     ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);              \
96 }                                                                       \
97                                                                         \
98 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst,                  \
99                                          const uint8_t *src,            \
100                                          ptrdiff_t stride)              \
101 {                                                                       \
102     uint64_t temp[8];                                                   \
103     uint8_t *const half = (uint8_t *) temp;                             \
104     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
105                                                    stride, 8);          \
106     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
107                                         stride, stride, 8);             \
108 }                                                                       \
109                                                                         \
110 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst,                  \
111                                          const uint8_t *src,            \
112                                          ptrdiff_t stride)              \
113 {                                                                       \
114     ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride,    \
115                                                    stride, 8);          \
116 }                                                                       \
117                                                                         \
118 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst,                  \
119                                          const uint8_t *src,            \
120                                          ptrdiff_t stride)              \
121 {                                                                       \
122     uint64_t temp[8];                                                   \
123     uint8_t *const half = (uint8_t *) temp;                             \
124     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
125                                                    stride, 8);          \
126     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride,     \
127                                         stride, 8);                     \
128 }                                                                       \
129                                                                         \
130 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst,                  \
131                                          const uint8_t *src,            \
132                                          ptrdiff_t stride)              \
133 {                                                                       \
134     uint64_t temp[8];                                                   \
135     uint8_t *const half = (uint8_t *) temp;                             \
136     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
137                                                    8, stride);          \
138     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
139                                         stride, stride, 8);             \
140 }                                                                       \
141                                                                         \
142 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst,                  \
143                                          const uint8_t *src,            \
144                                          ptrdiff_t stride)              \
145 {                                                                       \
146     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src,            \
147                                                    stride, stride);     \
148 }                                                                       \
149                                                                         \
150 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst,                  \
151                                          const uint8_t *src,            \
152                                          ptrdiff_t stride)              \
153 {                                                                       \
154     uint64_t temp[8];                                                   \
155     uint8_t *const half = (uint8_t *) temp;                             \
156     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
157                                                    8, stride);          \
158     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
159                                         stride, 8);                     \
160 }                                                                       \
161                                                                         \
162 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst,                  \
163                                          const uint8_t *src,            \
164                                          ptrdiff_t stride)              \
165 {                                                                       \
166     uint64_t half[8 + 9];                                               \
167     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
168     uint8_t *const halfHV = (uint8_t *) half;                           \
169     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
170                                                    stride, 9);          \
171     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
172                                         stride, 9);                     \
173     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
174     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
175                                         stride, 8, 8);                  \
176 }                                                                       \
177                                                                         \
178 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst,                  \
179                                          const uint8_t *src,            \
180                                          ptrdiff_t stride)              \
181 {                                                                       \
182     uint64_t half[8 + 9];                                               \
183     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
184     uint8_t *const halfHV = (uint8_t *) half;                           \
185     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
186                                                    stride, 9);          \
187     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
188                                         stride, 9);                     \
189     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
190     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
191                                         stride, 8, 8);                  \
192 }                                                                       \
193                                                                         \
194 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst,                  \
195                                          const uint8_t *src,            \
196                                          ptrdiff_t stride)              \
197 {                                                                       \
198     uint64_t half[8 + 9];                                               \
199     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
200     uint8_t *const halfHV = (uint8_t *) half;                           \
201     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
202                                                    stride, 9);          \
203     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
204                                         stride, 9);                     \
205     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
206     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
207                                         stride, 8, 8);                  \
208 }                                                                       \
209                                                                         \
210 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst,                  \
211                                          const uint8_t *src,            \
212                                          ptrdiff_t stride)              \
213 {                                                                       \
214     uint64_t half[8 + 9];                                               \
215     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
216     uint8_t *const halfHV = (uint8_t *) half;                           \
217     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
218                                                    stride, 9);          \
219     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
220                                         stride, 9);                     \
221     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
222     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
223                                         stride, 8, 8);                  \
224 }                                                                       \
225                                                                         \
226 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst,                  \
227                                          const uint8_t *src,            \
228                                          ptrdiff_t stride)              \
229 {                                                                       \
230     uint64_t half[8 + 9];                                               \
231     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
232     uint8_t *const halfHV = (uint8_t *) half;                           \
233     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
234                                                    stride, 9);          \
235     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
236     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
237                                         stride, 8, 8);                  \
238 }                                                                       \
239                                                                         \
240 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst,                  \
241                                          const uint8_t *src,            \
242                                          ptrdiff_t stride)              \
243 {                                                                       \
244     uint64_t half[8 + 9];                                               \
245     uint8_t *const halfH  = (uint8_t *) half + 64;                      \
246     uint8_t *const halfHV = (uint8_t *) half;                           \
247     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
248                                                    stride, 9);          \
249     ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
250     ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
251                                         stride, 8, 8);                  \
252 }                                                                       \
253                                                                         \
254 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst,                  \
255                                          const uint8_t *src,            \
256                                          ptrdiff_t stride)              \
257 {                                                                       \
258     uint64_t half[8 + 9];                                               \
259     uint8_t *const halfH = (uint8_t *) half;                            \
260     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
261                                                    stride, 9);          \
262     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH,              \
263                                         8, stride, 9);                  \
264     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
265                                                    stride, 8);          \
266 }                                                                       \
267                                                                         \
268 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst,                  \
269                                          const uint8_t *src,            \
270                                          ptrdiff_t stride)              \
271 {                                                                       \
272     uint64_t half[8 + 9];                                               \
273     uint8_t *const halfH = (uint8_t *) half;                            \
274     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
275                                                    stride, 9);          \
276     ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
277                                         stride, 9);                     \
278     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
279                                                    stride, 8);          \
280 }                                                                       \
281                                                                         \
282 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst,                  \
283                                          const uint8_t *src,            \
284                                          ptrdiff_t stride)              \
285 {                                                                       \
286     uint64_t half[9];                                                   \
287     uint8_t *const halfH = (uint8_t *) half;                            \
288     ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
289                                                    stride, 9);          \
290     ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
291                                                    stride, 8);          \
292 }                                                                       \
293                                                                         \
294 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst,                 \
295                                           const uint8_t *src,           \
296                                           ptrdiff_t stride)             \
297 {                                                                       \
298     ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);            \
299 }                                                                       \
300                                                                         \
301 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst,                 \
302                                           const uint8_t *src,           \
303                                           ptrdiff_t stride)             \
304 {                                                                       \
305     uint64_t temp[32];                                                  \
306     uint8_t *const half = (uint8_t *) temp;                             \
307     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
308                                                     stride, 16);        \
309     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
310                                          stride, 16);                   \
311 }                                                                       \
312                                                                         \
313 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst,                 \
314                                           const uint8_t *src,           \
315                                           ptrdiff_t stride)             \
316 {                                                                       \
317     ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src,           \
318                                                     stride, stride, 16);\
319 }                                                                       \
320                                                                         \
321 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst,                 \
322                                           const uint8_t *src,           \
323                                           ptrdiff_t stride)             \
324 {                                                                       \
325     uint64_t temp[32];                                                  \
326     uint8_t *const half = (uint8_t*) temp;                              \
327     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
328                                                     stride, 16);        \
329     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half,            \
330                                          stride, stride, 16);           \
331 }                                                                       \
332                                                                         \
333 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst,                 \
334                                           const uint8_t *src,           \
335                                           ptrdiff_t stride)             \
336 {                                                                       \
337     uint64_t temp[32];                                                  \
338     uint8_t *const half = (uint8_t *) temp;                             \
339     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
340                                                     stride);            \
341     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
342                                          stride, 16);                   \
343 }                                                                       \
344                                                                         \
345 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst,                 \
346                                           const uint8_t *src,           \
347                                           ptrdiff_t stride)             \
348 {                                                                       \
349     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src,           \
350                                                     stride, stride);    \
351 }                                                                       \
352                                                                         \
353 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst,                 \
354                                           const uint8_t *src,           \
355                                           ptrdiff_t stride)             \
356 {                                                                       \
357     uint64_t temp[32];                                                  \
358     uint8_t *const half = (uint8_t *) temp;                             \
359     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
360                                                     stride);            \
361     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half,         \
362                                          stride, stride, 16);           \
363 }                                                                       \
364                                                                         \
365 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst,                 \
366                                           const uint8_t *src,           \
367                                           ptrdiff_t stride)             \
368 {                                                                       \
369     uint64_t half[16 * 2 + 17 * 2];                                     \
370     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
371     uint8_t *const halfHV = (uint8_t *) half;                           \
372     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
373                                                     stride, 17);        \
374     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
375                                          stride, 17);                   \
376     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
377                                                     16, 16);            \
378     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
379                                          stride, 16, 16);               \
380 }                                                                       \
381                                                                         \
382 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst,                 \
383                                           const uint8_t *src,           \
384                                           ptrdiff_t stride)             \
385 {                                                                       \
386     uint64_t half[16 * 2 + 17 * 2];                                     \
387     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
388     uint8_t *const halfHV = (uint8_t *) half;                           \
389     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
390                                                     stride, 17);        \
391     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
392                                          stride, 17);                   \
393     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
394                                                     16, 16);            \
395     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
396                                          stride, 16, 16);               \
397 }                                                                       \
398                                                                         \
399 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst,                 \
400                                           const uint8_t *src,           \
401                                           ptrdiff_t stride)             \
402 {                                                                       \
403     uint64_t half[16 * 2 + 17 * 2];                                     \
404     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
405     uint8_t *const halfHV = (uint8_t *) half;                           \
406     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
407                                                     stride, 17);        \
408     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
409                                          stride, 17);                   \
410     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
411                                                     16, 16);            \
412     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
413                                          stride, 16, 16);               \
414 }                                                                       \
415                                                                         \
416 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst,                 \
417                                           const uint8_t *src,           \
418                                           ptrdiff_t stride)             \
419 {                                                                       \
420     uint64_t half[16 * 2 + 17 * 2];                                     \
421     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
422     uint8_t *const halfHV = (uint8_t *) half;                           \
423     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
424                                                     stride, 17);        \
425     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
426                                          stride, 17);                   \
427     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
428                                                     16, 16);            \
429     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
430                                          stride, 16, 16);               \
431 }                                                                       \
432                                                                         \
433 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst,                 \
434                                           const uint8_t *src,           \
435                                           ptrdiff_t stride)             \
436 {                                                                       \
437     uint64_t half[16 * 2 + 17 * 2];                                     \
438     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
439     uint8_t *const halfHV = (uint8_t *) half;                           \
440     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
441                                                     stride, 17);        \
442     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
443                                                     16, 16);            \
444     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
445                                          stride, 16, 16);               \
446 }                                                                       \
447                                                                         \
448 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst,                 \
449                                           const uint8_t *src,           \
450                                           ptrdiff_t stride)             \
451 {                                                                       \
452     uint64_t half[16 * 2 + 17 * 2];                                     \
453     uint8_t *const halfH  = (uint8_t *) half + 256;                     \
454     uint8_t *const halfHV = (uint8_t *) half;                           \
455     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
456                                                     stride, 17);        \
457     ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
458                                                     16, 16);            \
459     ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
460                                          stride, 16, 16);               \
461 }                                                                       \
462                                                                         \
463 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst,                 \
464                                           const uint8_t *src,           \
465                                           ptrdiff_t stride)             \
466 {                                                                       \
467     uint64_t half[17 * 2];                                              \
468     uint8_t *const halfH = (uint8_t *) half;                            \
469     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
470                                                     stride, 17);        \
471     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
472                                          stride, 17);                   \
473     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
474                                                     stride, 16);        \
475 }                                                                       \
476                                                                         \
477 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst,                 \
478                                           const uint8_t *src,           \
479                                           ptrdiff_t stride)             \
480 {                                                                       \
481     uint64_t half[17 * 2];                                              \
482     uint8_t *const halfH = (uint8_t *) half;                            \
483     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
484                                                     stride, 17);        \
485     ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
486                                          stride, 17);                   \
487     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
488                                                     stride, 16);        \
489 }                                                                       \
490                                                                         \
491 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst,                 \
492                                           const uint8_t *src,           \
493                                           ptrdiff_t stride)             \
494 {                                                                       \
495     uint64_t half[17 * 2];                                              \
496     uint8_t *const halfH = (uint8_t *) half;                            \
497     ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
498                                                     stride, 17);        \
499     ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
500                                                     stride, 16);        \
501 }
502
503 QPEL_OP(put_,        _,        mmxext)
504 QPEL_OP(avg_,        _,        mmxext)
505 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
506
507 #endif /* HAVE_YASM */
508
509 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
510 do {                                                                         \
511     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
512     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
513     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
514     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
515     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
516     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
517     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
518     c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
519     c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
520     c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
521     c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
522     c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
523     c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
524     c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
525     c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
526     c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
527 } while (0)
528
529 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
530 {
531     int cpu_flags = av_get_cpu_flags();
532
533     if (X86_MMXEXT(cpu_flags)) {
534 #if HAVE_MMXEXT_EXTERNAL
535         SET_QPEL_FUNCS(avg_qpel,        0, 16, mmxext, );
536         SET_QPEL_FUNCS(avg_qpel,        1,  8, mmxext, );
537
538         SET_QPEL_FUNCS(put_qpel,        0, 16, mmxext, );
539         SET_QPEL_FUNCS(put_qpel,        1,  8, mmxext, );
540         SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
541         SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, mmxext, );
542 #endif /* HAVE_MMXEXT_EXTERNAL */
543     }
544 }