2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "libyuv/rotate.h"
13 #include "libyuv/cpu_id.h"
14 #include "libyuv/convert.h"
15 #include "libyuv/planar_functions.h"
16 #include "libyuv/row.h"
23 #if !defined(LIBYUV_DISABLE_X86) && \
24 (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
25 #if defined(__APPLE__) && defined(__i386__)
26 #define DECLARE_FUNCTION(name) \
28 ".private_extern _" #name " \n" \
31 #elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
32 #define DECLARE_FUNCTION(name) \
37 #define DECLARE_FUNCTION(name) \
44 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
45 (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
46 #define HAS_TRANSPOSE_WX8_NEON
47 void TransposeWx8_NEON(const uint8* src, int src_stride,
48 uint8* dst, int dst_stride, int width);
49 #define HAS_TRANSPOSE_UVWX8_NEON
50 void TransposeUVWx8_NEON(const uint8* src, int src_stride,
51 uint8* dst_a, int dst_stride_a,
52 uint8* dst_b, int dst_stride_b,
56 #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
57 defined(__mips__) && \
58 defined(__mips_dsp) && (__mips_dsp_rev >= 2)
59 #define HAS_TRANSPOSE_WX8_MIPS_DSPR2
60 void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
61 uint8* dst, int dst_stride, int width);
63 void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
64 uint8* dst, int dst_stride, int width);
65 #define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
66 void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
67 uint8* dst_a, int dst_stride_a,
68 uint8* dst_b, int dst_stride_b,
70 #endif // defined(__mips__)
72 #if !defined(LIBYUV_DISABLE_X86) && \
73 defined(_M_IX86) && defined(_MSC_VER)
74 #define HAS_TRANSPOSE_WX8_SSSE3
75 __declspec(naked) __declspec(align(16))
76 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
77 uint8* dst, int dst_stride, int width) {
82 mov eax, [esp + 12 + 4] // src
83 mov edi, [esp + 12 + 8] // src_stride
84 mov edx, [esp + 12 + 12] // dst
85 mov esi, [esp + 12 + 16] // dst_stride
86 mov ecx, [esp + 12 + 20] // width
88 // Read in the data from the source pointer.
89 // First round of bit swap.
92 movq xmm0, qword ptr [eax]
94 movq xmm1, qword ptr [eax + edi]
95 lea eax, [eax + 2 * edi]
97 movq xmm2, qword ptr [eax]
100 movq xmm3, qword ptr [eax + edi]
101 lea eax, [eax + 2 * edi]
104 movq xmm4, qword ptr [eax]
105 palignr xmm3, xmm3, 8
106 movq xmm5, qword ptr [eax + edi]
108 lea eax, [eax + 2 * edi]
110 movq xmm6, qword ptr [eax]
111 palignr xmm5, xmm5, 8
112 movq xmm7, qword ptr [eax + edi]
116 palignr xmm7, xmm7, 8
117 // Second round of bit swap.
122 palignr xmm2, xmm2, 8
123 palignr xmm3, xmm3, 8
128 palignr xmm6, xmm6, 8
129 palignr xmm7, xmm7, 8
130 // Third round of bit swap.
131 // Write to the destination pointer.
133 movq qword ptr [edx], xmm0
135 palignr xmm4, xmm4, 8
136 movq qword ptr [edx + esi], xmm4
137 lea edx, [edx + 2 * esi]
140 palignr xmm6, xmm6, 8
141 movq qword ptr [edx], xmm2
143 movq qword ptr [edx + esi], xmm6
144 lea edx, [edx + 2 * esi]
146 movq qword ptr [edx], xmm1
147 palignr xmm5, xmm5, 8
149 movq qword ptr [edx + esi], xmm5
150 lea edx, [edx + 2 * esi]
151 movq qword ptr [edx], xmm3
153 palignr xmm7, xmm7, 8
155 movq qword ptr [edx + esi], xmm7
156 lea edx, [edx + 2 * esi]
166 #define HAS_TRANSPOSE_UVWX8_SSE2
167 __declspec(naked) __declspec(align(16))
168 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
169 uint8* dst_a, int dst_stride_a,
170 uint8* dst_b, int dst_stride_b,
177 mov eax, [esp + 16 + 4] // src
178 mov edi, [esp + 16 + 8] // src_stride
179 mov edx, [esp + 16 + 12] // dst_a
180 mov esi, [esp + 16 + 16] // dst_stride_a
181 mov ebx, [esp + 16 + 20] // dst_b
182 mov ebp, [esp + 16 + 24] // dst_stride_b
187 mov ecx, [ecx + 16 + 28] // w
191 // Read in the data from the source pointer.
192 // First round of bit swap.
194 movdqu xmm1, [eax + edi]
195 lea eax, [eax + 2 * edi]
196 movdqa xmm7, xmm0 // use xmm7 as temp register.
201 movdqu xmm3, [eax + edi]
202 lea eax, [eax + 2 * edi]
208 movdqu xmm5, [eax + edi]
209 lea eax, [eax + 2 * edi]
215 movdqu xmm7, [eax + edi]
216 lea eax, [eax + 2 * edi]
217 movdqu [esp], xmm5 // backup xmm5
219 movdqa xmm5, xmm6 // use xmm5 as temp register.
223 lea eax, [eax + 8 * edi + 16]
225 // Second round of bit swap.
238 movdqu xmm5, [esp] // restore xmm5
239 movdqu [esp], xmm6 // backup xmm6
240 movdqa xmm6, xmm5 // use xmm6 as temp register.
244 // Third round of bit swap.
245 // Write to the destination pointer.
250 movdqu xmm6, [esp] // restore xmm6
251 movlpd qword ptr [edx], xmm0
252 movhpd qword ptr [ebx], xmm0
253 movlpd qword ptr [edx + esi], xmm4
254 lea edx, [edx + 2 * esi]
255 movhpd qword ptr [ebx + ebp], xmm4
256 lea ebx, [ebx + 2 * ebp]
257 movdqa xmm0, xmm2 // use xmm0 as the temp register.
259 movlpd qword ptr [edx], xmm2
260 movhpd qword ptr [ebx], xmm2
262 movlpd qword ptr [edx + esi], xmm0
263 lea edx, [edx + 2 * esi]
264 movhpd qword ptr [ebx + ebp], xmm0
265 lea ebx, [ebx + 2 * ebp]
266 movdqa xmm0, xmm1 // use xmm0 as the temp register.
268 movlpd qword ptr [edx], xmm1
269 movhpd qword ptr [ebx], xmm1
271 movlpd qword ptr [edx + esi], xmm0
272 lea edx, [edx + 2 * esi]
273 movhpd qword ptr [ebx + ebp], xmm0
274 lea ebx, [ebx + 2 * ebp]
275 movdqa xmm0, xmm3 // use xmm0 as the temp register.
277 movlpd qword ptr [edx], xmm3
278 movhpd qword ptr [ebx], xmm3
281 movlpd qword ptr [edx + esi], xmm0
282 lea edx, [edx + 2 * esi]
283 movhpd qword ptr [ebx + ebp], xmm0
284 lea ebx, [ebx + 2 * ebp]
296 #if !defined(LIBYUV_DISABLE_X86) && \
297 (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
298 #define HAS_TRANSPOSE_WX8_SSSE3
299 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
300 uint8* dst, int dst_stride, int width) {
302 // Read in the data from the source pointer.
303 // First round of bit swap.
306 "movq (%0),%%xmm0 \n"
307 "movq (%0,%3),%%xmm1 \n"
308 "lea (%0,%3,2),%0 \n"
309 "punpcklbw %%xmm1,%%xmm0 \n"
310 "movq (%0),%%xmm2 \n"
311 "movdqa %%xmm0,%%xmm1 \n"
312 "palignr $0x8,%%xmm1,%%xmm1 \n"
313 "movq (%0,%3),%%xmm3 \n"
314 "lea (%0,%3,2),%0 \n"
315 "punpcklbw %%xmm3,%%xmm2 \n"
316 "movdqa %%xmm2,%%xmm3 \n"
317 "movq (%0),%%xmm4 \n"
318 "palignr $0x8,%%xmm3,%%xmm3 \n"
319 "movq (%0,%3),%%xmm5 \n"
320 "lea (%0,%3,2),%0 \n"
321 "punpcklbw %%xmm5,%%xmm4 \n"
322 "movdqa %%xmm4,%%xmm5 \n"
323 "movq (%0),%%xmm6 \n"
324 "palignr $0x8,%%xmm5,%%xmm5 \n"
325 "movq (%0,%3),%%xmm7 \n"
326 "lea (%0,%3,2),%0 \n"
327 "punpcklbw %%xmm7,%%xmm6 \n"
329 "movdqa %%xmm6,%%xmm7 \n"
330 "lea 0x8(%0,%3,8),%0 \n"
331 "palignr $0x8,%%xmm7,%%xmm7 \n"
333 // Second round of bit swap.
334 "punpcklwd %%xmm2,%%xmm0 \n"
335 "punpcklwd %%xmm3,%%xmm1 \n"
336 "movdqa %%xmm0,%%xmm2 \n"
337 "movdqa %%xmm1,%%xmm3 \n"
338 "palignr $0x8,%%xmm2,%%xmm2 \n"
339 "palignr $0x8,%%xmm3,%%xmm3 \n"
340 "punpcklwd %%xmm6,%%xmm4 \n"
341 "punpcklwd %%xmm7,%%xmm5 \n"
342 "movdqa %%xmm4,%%xmm6 \n"
343 "movdqa %%xmm5,%%xmm7 \n"
344 "palignr $0x8,%%xmm6,%%xmm6 \n"
345 "palignr $0x8,%%xmm7,%%xmm7 \n"
346 // Third round of bit swap.
347 // Write to the destination pointer.
348 "punpckldq %%xmm4,%%xmm0 \n"
349 "movq %%xmm0,(%1) \n"
350 "movdqa %%xmm0,%%xmm4 \n"
351 "palignr $0x8,%%xmm4,%%xmm4 \n"
352 "movq %%xmm4,(%1,%4) \n"
353 "lea (%1,%4,2),%1 \n"
354 "punpckldq %%xmm6,%%xmm2 \n"
355 "movdqa %%xmm2,%%xmm6 \n"
356 "movq %%xmm2,(%1) \n"
357 "palignr $0x8,%%xmm6,%%xmm6 \n"
358 "punpckldq %%xmm5,%%xmm1 \n"
359 "movq %%xmm6,(%1,%4) \n"
360 "lea (%1,%4,2),%1 \n"
361 "movdqa %%xmm1,%%xmm5 \n"
362 "movq %%xmm1,(%1) \n"
363 "palignr $0x8,%%xmm5,%%xmm5 \n"
364 "movq %%xmm5,(%1,%4) \n"
365 "lea (%1,%4,2),%1 \n"
366 "punpckldq %%xmm7,%%xmm3 \n"
367 "movq %%xmm3,(%1) \n"
368 "movdqa %%xmm3,%%xmm7 \n"
369 "palignr $0x8,%%xmm7,%%xmm7 \n"
371 "movq %%xmm7,(%1,%4) \n"
372 "lea (%1,%4,2),%1 \n"
377 : "r"((intptr_t)(src_stride)), // %3
378 "r"((intptr_t)(dst_stride)) // %4
380 #if defined(__SSE2__)
381 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
386 #if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)
387 #define HAS_TRANSPOSE_UVWX8_SSE2
388 void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
389 uint8* dst_a, int dst_stride_a,
390 uint8* dst_b, int dst_stride_b,
393 DECLARE_FUNCTION(TransposeUVWx8_SSE2)
398 "mov 0x14(%esp),%eax \n"
399 "mov 0x18(%esp),%edi \n"
400 "mov 0x1c(%esp),%edx \n"
401 "mov 0x20(%esp),%esi \n"
402 "mov 0x24(%esp),%ebx \n"
403 "mov 0x28(%esp),%ebp \n"
406 "and $0xfffffff0,%esp \n"
407 "mov %ecx,0x10(%esp) \n"
408 "mov 0x2c(%ecx),%ecx \n"
411 "movdqu (%eax),%xmm0 \n"
412 "movdqu (%eax,%edi,1),%xmm1 \n"
413 "lea (%eax,%edi,2),%eax \n"
414 "movdqa %xmm0,%xmm7 \n"
415 "punpcklbw %xmm1,%xmm0 \n"
416 "punpckhbw %xmm1,%xmm7 \n"
417 "movdqa %xmm7,%xmm1 \n"
418 "movdqu (%eax),%xmm2 \n"
419 "movdqu (%eax,%edi,1),%xmm3 \n"
420 "lea (%eax,%edi,2),%eax \n"
421 "movdqa %xmm2,%xmm7 \n"
422 "punpcklbw %xmm3,%xmm2 \n"
423 "punpckhbw %xmm3,%xmm7 \n"
424 "movdqa %xmm7,%xmm3 \n"
425 "movdqu (%eax),%xmm4 \n"
426 "movdqu (%eax,%edi,1),%xmm5 \n"
427 "lea (%eax,%edi,2),%eax \n"
428 "movdqa %xmm4,%xmm7 \n"
429 "punpcklbw %xmm5,%xmm4 \n"
430 "punpckhbw %xmm5,%xmm7 \n"
431 "movdqa %xmm7,%xmm5 \n"
432 "movdqu (%eax),%xmm6 \n"
433 "movdqu (%eax,%edi,1),%xmm7 \n"
434 "lea (%eax,%edi,2),%eax \n"
435 "movdqu %xmm5,(%esp) \n"
437 "movdqa %xmm6,%xmm5 \n"
438 "punpcklbw %xmm7,%xmm6 \n"
439 "punpckhbw %xmm7,%xmm5 \n"
440 "movdqa %xmm5,%xmm7 \n"
441 "lea 0x10(%eax,%edi,8),%eax \n"
443 "movdqa %xmm0,%xmm5 \n"
444 "punpcklwd %xmm2,%xmm0 \n"
445 "punpckhwd %xmm2,%xmm5 \n"
446 "movdqa %xmm5,%xmm2 \n"
447 "movdqa %xmm1,%xmm5 \n"
448 "punpcklwd %xmm3,%xmm1 \n"
449 "punpckhwd %xmm3,%xmm5 \n"
450 "movdqa %xmm5,%xmm3 \n"
451 "movdqa %xmm4,%xmm5 \n"
452 "punpcklwd %xmm6,%xmm4 \n"
453 "punpckhwd %xmm6,%xmm5 \n"
454 "movdqa %xmm5,%xmm6 \n"
455 "movdqu (%esp),%xmm5 \n"
456 "movdqu %xmm6,(%esp) \n"
457 "movdqa %xmm5,%xmm6 \n"
458 "punpcklwd %xmm7,%xmm5 \n"
459 "punpckhwd %xmm7,%xmm6 \n"
460 "movdqa %xmm6,%xmm7 \n"
461 "movdqa %xmm0,%xmm6 \n"
462 "punpckldq %xmm4,%xmm0 \n"
463 "punpckhdq %xmm4,%xmm6 \n"
464 "movdqa %xmm6,%xmm4 \n"
465 "movdqu (%esp),%xmm6 \n"
466 "movlpd %xmm0,(%edx) \n"
467 "movhpd %xmm0,(%ebx) \n"
468 "movlpd %xmm4,(%edx,%esi,1) \n"
469 "lea (%edx,%esi,2),%edx \n"
470 "movhpd %xmm4,(%ebx,%ebp,1) \n"
471 "lea (%ebx,%ebp,2),%ebx \n"
472 "movdqa %xmm2,%xmm0 \n"
473 "punpckldq %xmm6,%xmm2 \n"
474 "movlpd %xmm2,(%edx) \n"
475 "movhpd %xmm2,(%ebx) \n"
476 "punpckhdq %xmm6,%xmm0 \n"
477 "movlpd %xmm0,(%edx,%esi,1) \n"
478 "lea (%edx,%esi,2),%edx \n"
479 "movhpd %xmm0,(%ebx,%ebp,1) \n"
480 "lea (%ebx,%ebp,2),%ebx \n"
481 "movdqa %xmm1,%xmm0 \n"
482 "punpckldq %xmm5,%xmm1 \n"
483 "movlpd %xmm1,(%edx) \n"
484 "movhpd %xmm1,(%ebx) \n"
485 "punpckhdq %xmm5,%xmm0 \n"
486 "movlpd %xmm0,(%edx,%esi,1) \n"
487 "lea (%edx,%esi,2),%edx \n"
488 "movhpd %xmm0,(%ebx,%ebp,1) \n"
489 "lea (%ebx,%ebp,2),%ebx \n"
490 "movdqa %xmm3,%xmm0 \n"
491 "punpckldq %xmm7,%xmm3 \n"
492 "movlpd %xmm3,(%edx) \n"
493 "movhpd %xmm3,(%ebx) \n"
494 "punpckhdq %xmm7,%xmm0 \n"
496 "movlpd %xmm0,(%edx,%esi,1) \n"
497 "lea (%edx,%esi,2),%edx \n"
498 "movhpd %xmm0,(%ebx,%ebp,1) \n"
499 "lea (%ebx,%ebp,2),%ebx \n"
501 "mov 0x10(%esp),%esp \n"
506 #if defined(__native_client__)
508 "and $0xffffffe0,%ecx \n"
515 #if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
517 // 64 bit version has enough registers to do 16x8 to 8x16 at a time.
518 #define HAS_TRANSPOSE_WX8_FAST_SSSE3
519 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
520 uint8* dst, int dst_stride, int width) {
522 // Read in the data from the source pointer.
523 // First round of bit swap.
526 "movdqu (%0),%%xmm0 \n"
527 "movdqu (%0,%3),%%xmm1 \n"
528 "lea (%0,%3,2),%0 \n"
529 "movdqa %%xmm0,%%xmm8 \n"
530 "punpcklbw %%xmm1,%%xmm0 \n"
531 "punpckhbw %%xmm1,%%xmm8 \n"
532 "movdqu (%0),%%xmm2 \n"
533 "movdqa %%xmm0,%%xmm1 \n"
534 "movdqa %%xmm8,%%xmm9 \n"
535 "palignr $0x8,%%xmm1,%%xmm1 \n"
536 "palignr $0x8,%%xmm9,%%xmm9 \n"
537 "movdqu (%0,%3),%%xmm3 \n"
538 "lea (%0,%3,2),%0 \n"
539 "movdqa %%xmm2,%%xmm10 \n"
540 "punpcklbw %%xmm3,%%xmm2 \n"
541 "punpckhbw %%xmm3,%%xmm10 \n"
542 "movdqa %%xmm2,%%xmm3 \n"
543 "movdqa %%xmm10,%%xmm11 \n"
544 "movdqu (%0),%%xmm4 \n"
545 "palignr $0x8,%%xmm3,%%xmm3 \n"
546 "palignr $0x8,%%xmm11,%%xmm11 \n"
547 "movdqu (%0,%3),%%xmm5 \n"
548 "lea (%0,%3,2),%0 \n"
549 "movdqa %%xmm4,%%xmm12 \n"
550 "punpcklbw %%xmm5,%%xmm4 \n"
551 "punpckhbw %%xmm5,%%xmm12 \n"
552 "movdqa %%xmm4,%%xmm5 \n"
553 "movdqa %%xmm12,%%xmm13 \n"
554 "movdqu (%0),%%xmm6 \n"
555 "palignr $0x8,%%xmm5,%%xmm5 \n"
556 "palignr $0x8,%%xmm13,%%xmm13 \n"
557 "movdqu (%0,%3),%%xmm7 \n"
558 "lea (%0,%3,2),%0 \n"
559 "movdqa %%xmm6,%%xmm14 \n"
560 "punpcklbw %%xmm7,%%xmm6 \n"
561 "punpckhbw %%xmm7,%%xmm14 \n"
563 "movdqa %%xmm6,%%xmm7 \n"
564 "movdqa %%xmm14,%%xmm15 \n"
565 "lea 0x10(%0,%3,8),%0 \n"
566 "palignr $0x8,%%xmm7,%%xmm7 \n"
567 "palignr $0x8,%%xmm15,%%xmm15 \n"
569 // Second round of bit swap.
570 "punpcklwd %%xmm2,%%xmm0 \n"
571 "punpcklwd %%xmm3,%%xmm1 \n"
572 "movdqa %%xmm0,%%xmm2 \n"
573 "movdqa %%xmm1,%%xmm3 \n"
574 "palignr $0x8,%%xmm2,%%xmm2 \n"
575 "palignr $0x8,%%xmm3,%%xmm3 \n"
576 "punpcklwd %%xmm6,%%xmm4 \n"
577 "punpcklwd %%xmm7,%%xmm5 \n"
578 "movdqa %%xmm4,%%xmm6 \n"
579 "movdqa %%xmm5,%%xmm7 \n"
580 "palignr $0x8,%%xmm6,%%xmm6 \n"
581 "palignr $0x8,%%xmm7,%%xmm7 \n"
582 "punpcklwd %%xmm10,%%xmm8 \n"
583 "punpcklwd %%xmm11,%%xmm9 \n"
584 "movdqa %%xmm8,%%xmm10 \n"
585 "movdqa %%xmm9,%%xmm11 \n"
586 "palignr $0x8,%%xmm10,%%xmm10 \n"
587 "palignr $0x8,%%xmm11,%%xmm11 \n"
588 "punpcklwd %%xmm14,%%xmm12 \n"
589 "punpcklwd %%xmm15,%%xmm13 \n"
590 "movdqa %%xmm12,%%xmm14 \n"
591 "movdqa %%xmm13,%%xmm15 \n"
592 "palignr $0x8,%%xmm14,%%xmm14 \n"
593 "palignr $0x8,%%xmm15,%%xmm15 \n"
594 // Third round of bit swap.
595 // Write to the destination pointer.
596 "punpckldq %%xmm4,%%xmm0 \n"
597 "movq %%xmm0,(%1) \n"
598 "movdqa %%xmm0,%%xmm4 \n"
599 "palignr $0x8,%%xmm4,%%xmm4 \n"
600 "movq %%xmm4,(%1,%4) \n"
601 "lea (%1,%4,2),%1 \n"
602 "punpckldq %%xmm6,%%xmm2 \n"
603 "movdqa %%xmm2,%%xmm6 \n"
604 "movq %%xmm2,(%1) \n"
605 "palignr $0x8,%%xmm6,%%xmm6 \n"
606 "punpckldq %%xmm5,%%xmm1 \n"
607 "movq %%xmm6,(%1,%4) \n"
608 "lea (%1,%4,2),%1 \n"
609 "movdqa %%xmm1,%%xmm5 \n"
610 "movq %%xmm1,(%1) \n"
611 "palignr $0x8,%%xmm5,%%xmm5 \n"
612 "movq %%xmm5,(%1,%4) \n"
613 "lea (%1,%4,2),%1 \n"
614 "punpckldq %%xmm7,%%xmm3 \n"
615 "movq %%xmm3,(%1) \n"
616 "movdqa %%xmm3,%%xmm7 \n"
617 "palignr $0x8,%%xmm7,%%xmm7 \n"
618 "movq %%xmm7,(%1,%4) \n"
619 "lea (%1,%4,2),%1 \n"
620 "punpckldq %%xmm12,%%xmm8 \n"
621 "movq %%xmm8,(%1) \n"
622 "movdqa %%xmm8,%%xmm12 \n"
623 "palignr $0x8,%%xmm12,%%xmm12 \n"
624 "movq %%xmm12,(%1,%4) \n"
625 "lea (%1,%4,2),%1 \n"
626 "punpckldq %%xmm14,%%xmm10 \n"
627 "movdqa %%xmm10,%%xmm14 \n"
628 "movq %%xmm10,(%1) \n"
629 "palignr $0x8,%%xmm14,%%xmm14 \n"
630 "punpckldq %%xmm13,%%xmm9 \n"
631 "movq %%xmm14,(%1,%4) \n"
632 "lea (%1,%4,2),%1 \n"
633 "movdqa %%xmm9,%%xmm13 \n"
634 "movq %%xmm9,(%1) \n"
635 "palignr $0x8,%%xmm13,%%xmm13 \n"
636 "movq %%xmm13,(%1,%4) \n"
637 "lea (%1,%4,2),%1 \n"
638 "punpckldq %%xmm15,%%xmm11 \n"
639 "movq %%xmm11,(%1) \n"
640 "movdqa %%xmm11,%%xmm15 \n"
641 "palignr $0x8,%%xmm15,%%xmm15 \n"
643 "movq %%xmm15,(%1,%4) \n"
644 "lea (%1,%4,2),%1 \n"
649 : "r"((intptr_t)(src_stride)), // %3
650 "r"((intptr_t)(dst_stride)) // %4
652 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
653 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
657 #define HAS_TRANSPOSE_UVWX8_SSE2
658 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
659 uint8* dst_a, int dst_stride_a,
660 uint8* dst_b, int dst_stride_b,
663 // Read in the data from the source pointer.
664 // First round of bit swap.
667 "movdqu (%0),%%xmm0 \n"
668 "movdqu (%0,%4),%%xmm1 \n"
669 "lea (%0,%4,2),%0 \n"
670 "movdqa %%xmm0,%%xmm8 \n"
671 "punpcklbw %%xmm1,%%xmm0 \n"
672 "punpckhbw %%xmm1,%%xmm8 \n"
673 "movdqa %%xmm8,%%xmm1 \n"
674 "movdqu (%0),%%xmm2 \n"
675 "movdqu (%0,%4),%%xmm3 \n"
676 "lea (%0,%4,2),%0 \n"
677 "movdqa %%xmm2,%%xmm8 \n"
678 "punpcklbw %%xmm3,%%xmm2 \n"
679 "punpckhbw %%xmm3,%%xmm8 \n"
680 "movdqa %%xmm8,%%xmm3 \n"
681 "movdqu (%0),%%xmm4 \n"
682 "movdqu (%0,%4),%%xmm5 \n"
683 "lea (%0,%4,2),%0 \n"
684 "movdqa %%xmm4,%%xmm8 \n"
685 "punpcklbw %%xmm5,%%xmm4 \n"
686 "punpckhbw %%xmm5,%%xmm8 \n"
687 "movdqa %%xmm8,%%xmm5 \n"
688 "movdqu (%0),%%xmm6 \n"
689 "movdqu (%0,%4),%%xmm7 \n"
690 "lea (%0,%4,2),%0 \n"
691 "movdqa %%xmm6,%%xmm8 \n"
692 "punpcklbw %%xmm7,%%xmm6 \n"
694 "lea 0x10(%0,%4,8),%0 \n"
695 "punpckhbw %%xmm7,%%xmm8 \n"
696 "movdqa %%xmm8,%%xmm7 \n"
698 // Second round of bit swap.
699 "movdqa %%xmm0,%%xmm8 \n"
700 "movdqa %%xmm1,%%xmm9 \n"
701 "punpckhwd %%xmm2,%%xmm8 \n"
702 "punpckhwd %%xmm3,%%xmm9 \n"
703 "punpcklwd %%xmm2,%%xmm0 \n"
704 "punpcklwd %%xmm3,%%xmm1 \n"
705 "movdqa %%xmm8,%%xmm2 \n"
706 "movdqa %%xmm9,%%xmm3 \n"
707 "movdqa %%xmm4,%%xmm8 \n"
708 "movdqa %%xmm5,%%xmm9 \n"
709 "punpckhwd %%xmm6,%%xmm8 \n"
710 "punpckhwd %%xmm7,%%xmm9 \n"
711 "punpcklwd %%xmm6,%%xmm4 \n"
712 "punpcklwd %%xmm7,%%xmm5 \n"
713 "movdqa %%xmm8,%%xmm6 \n"
714 "movdqa %%xmm9,%%xmm7 \n"
715 // Third round of bit swap.
716 // Write to the destination pointer.
717 "movdqa %%xmm0,%%xmm8 \n"
718 "punpckldq %%xmm4,%%xmm0 \n"
719 "movlpd %%xmm0,(%1) \n" // Write back U channel
720 "movhpd %%xmm0,(%2) \n" // Write back V channel
721 "punpckhdq %%xmm4,%%xmm8 \n"
722 "movlpd %%xmm8,(%1,%5) \n"
723 "lea (%1,%5,2),%1 \n"
724 "movhpd %%xmm8,(%2,%6) \n"
725 "lea (%2,%6,2),%2 \n"
726 "movdqa %%xmm2,%%xmm8 \n"
727 "punpckldq %%xmm6,%%xmm2 \n"
728 "movlpd %%xmm2,(%1) \n"
729 "movhpd %%xmm2,(%2) \n"
730 "punpckhdq %%xmm6,%%xmm8 \n"
731 "movlpd %%xmm8,(%1,%5) \n"
732 "lea (%1,%5,2),%1 \n"
733 "movhpd %%xmm8,(%2,%6) \n"
734 "lea (%2,%6,2),%2 \n"
735 "movdqa %%xmm1,%%xmm8 \n"
736 "punpckldq %%xmm5,%%xmm1 \n"
737 "movlpd %%xmm1,(%1) \n"
738 "movhpd %%xmm1,(%2) \n"
739 "punpckhdq %%xmm5,%%xmm8 \n"
740 "movlpd %%xmm8,(%1,%5) \n"
741 "lea (%1,%5,2),%1 \n"
742 "movhpd %%xmm8,(%2,%6) \n"
743 "lea (%2,%6,2),%2 \n"
744 "movdqa %%xmm3,%%xmm8 \n"
745 "punpckldq %%xmm7,%%xmm3 \n"
746 "movlpd %%xmm3,(%1) \n"
747 "movhpd %%xmm3,(%2) \n"
748 "punpckhdq %%xmm7,%%xmm8 \n"
750 "movlpd %%xmm8,(%1,%5) \n"
751 "lea (%1,%5,2),%1 \n"
752 "movhpd %%xmm8,(%2,%6) \n"
753 "lea (%2,%6,2),%2 \n"
759 : "r"((intptr_t)(src_stride)), // %4
760 "r"((intptr_t)(dst_stride_a)), // %5
761 "r"((intptr_t)(dst_stride_b)) // %6
763 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
770 static void TransposeWx8_C(const uint8* src, int src_stride,
771 uint8* dst, int dst_stride,
774 for (i = 0; i < width; ++i) {
775 dst[0] = src[0 * src_stride];
776 dst[1] = src[1 * src_stride];
777 dst[2] = src[2 * src_stride];
778 dst[3] = src[3 * src_stride];
779 dst[4] = src[4 * src_stride];
780 dst[5] = src[5 * src_stride];
781 dst[6] = src[6 * src_stride];
782 dst[7] = src[7 * src_stride];
788 static void TransposeWxH_C(const uint8* src, int src_stride,
789 uint8* dst, int dst_stride,
790 int width, int height) {
792 for (i = 0; i < width; ++i) {
794 for (j = 0; j < height; ++j) {
795 dst[i * dst_stride + j] = src[j * src_stride + i];
801 void TransposePlane(const uint8* src, int src_stride,
802 uint8* dst, int dst_stride,
803 int width, int height) {
805 void (*TransposeWx8)(const uint8* src, int src_stride,
806 uint8* dst, int dst_stride,
807 int width) = TransposeWx8_C;
808 #if defined(HAS_TRANSPOSE_WX8_NEON)
809 if (TestCpuFlag(kCpuHasNEON)) {
810 TransposeWx8 = TransposeWx8_NEON;
813 #if defined(HAS_TRANSPOSE_WX8_SSSE3)
814 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
815 TransposeWx8 = TransposeWx8_SSSE3;
818 #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
819 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
820 TransposeWx8 = TransposeWx8_FAST_SSSE3;
823 #if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
824 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
825 if (IS_ALIGNED(width, 4) &&
826 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
827 TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
829 TransposeWx8 = TransposeWx8_MIPS_DSPR2;
834 // Work across the source in 8x8 tiles
836 TransposeWx8(src, src_stride, dst, dst_stride, width);
837 src += 8 * src_stride; // Go down 8 rows.
838 dst += 8; // Move over 8 columns.
842 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
846 void RotatePlane90(const uint8* src, int src_stride,
847 uint8* dst, int dst_stride,
848 int width, int height) {
849 // Rotate by 90 is a transpose with the source read
850 // from bottom to top. So set the source pointer to the end
851 // of the buffer and flip the sign of the source stride.
852 src += src_stride * (height - 1);
853 src_stride = -src_stride;
854 TransposePlane(src, src_stride, dst, dst_stride, width, height);
858 void RotatePlane270(const uint8* src, int src_stride,
859 uint8* dst, int dst_stride,
860 int width, int height) {
861 // Rotate by 270 is a transpose with the destination written
862 // from bottom to top. So set the destination pointer to the end
863 // of the buffer and flip the sign of the destination stride.
864 dst += dst_stride * (width - 1);
865 dst_stride = -dst_stride;
866 TransposePlane(src, src_stride, dst, dst_stride, width, height);
870 void RotatePlane180(const uint8* src, int src_stride,
871 uint8* dst, int dst_stride,
872 int width, int height) {
873 // Swap first and last row and mirror the content. Uses a temporary row.
874 align_buffer_64(row, width);
875 const uint8* src_bot = src + src_stride * (height - 1);
876 uint8* dst_bot = dst + dst_stride * (height - 1);
877 int half_height = (height + 1) >> 1;
879 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
880 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
881 #if defined(HAS_MIRRORROW_NEON)
882 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
883 MirrorRow = MirrorRow_NEON;
886 #if defined(HAS_MIRRORROW_SSE2)
887 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
888 MirrorRow = MirrorRow_SSE2;
891 #if defined(HAS_MIRRORROW_SSSE3)
892 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
893 MirrorRow = MirrorRow_SSSE3;
896 #if defined(HAS_MIRRORROW_AVX2)
897 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
898 MirrorRow = MirrorRow_AVX2;
901 // TODO(fbarchard): Mirror on mips handle unaligned memory.
902 #if defined(HAS_MIRRORROW_MIPS_DSPR2)
903 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
904 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
905 IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
906 MirrorRow = MirrorRow_MIPS_DSPR2;
909 #if defined(HAS_COPYROW_NEON)
910 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
911 CopyRow = CopyRow_NEON;
914 #if defined(HAS_COPYROW_X86)
915 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
916 CopyRow = CopyRow_X86;
919 #if defined(HAS_COPYROW_SSE2)
920 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
921 CopyRow = CopyRow_SSE2;
924 #if defined(HAS_COPYROW_AVX)
925 if (TestCpuFlag(kCpuHasAVX) && IS_ALIGNED(width, 64)) {
926 CopyRow = CopyRow_AVX;
929 #if defined(HAS_COPYROW_ERMS)
930 if (TestCpuFlag(kCpuHasERMS)) {
931 CopyRow = CopyRow_ERMS;
934 #if defined(HAS_COPYROW_MIPS)
935 if (TestCpuFlag(kCpuHasMIPS)) {
936 CopyRow = CopyRow_MIPS;
940 // Odd height will harmlessly mirror the middle row twice.
941 for (y = 0; y < half_height; ++y) {
942 MirrorRow(src, row, width); // Mirror first row into a buffer
944 MirrorRow(src_bot, dst, width); // Mirror last row into first row
946 CopyRow(row, dst_bot, width); // Copy first mirrored row into last
947 src_bot -= src_stride;
948 dst_bot -= dst_stride;
950 free_aligned_buffer_64(row);
953 static void TransposeUVWx8_C(const uint8* src, int src_stride,
954 uint8* dst_a, int dst_stride_a,
955 uint8* dst_b, int dst_stride_b,
958 for (i = 0; i < width; ++i) {
959 dst_a[0] = src[0 * src_stride + 0];
960 dst_b[0] = src[0 * src_stride + 1];
961 dst_a[1] = src[1 * src_stride + 0];
962 dst_b[1] = src[1 * src_stride + 1];
963 dst_a[2] = src[2 * src_stride + 0];
964 dst_b[2] = src[2 * src_stride + 1];
965 dst_a[3] = src[3 * src_stride + 0];
966 dst_b[3] = src[3 * src_stride + 1];
967 dst_a[4] = src[4 * src_stride + 0];
968 dst_b[4] = src[4 * src_stride + 1];
969 dst_a[5] = src[5 * src_stride + 0];
970 dst_b[5] = src[5 * src_stride + 1];
971 dst_a[6] = src[6 * src_stride + 0];
972 dst_b[6] = src[6 * src_stride + 1];
973 dst_a[7] = src[7 * src_stride + 0];
974 dst_b[7] = src[7 * src_stride + 1];
976 dst_a += dst_stride_a;
977 dst_b += dst_stride_b;
981 static void TransposeUVWxH_C(const uint8* src, int src_stride,
982 uint8* dst_a, int dst_stride_a,
983 uint8* dst_b, int dst_stride_b,
984 int width, int height) {
986 for (i = 0; i < width * 2; i += 2) {
988 for (j = 0; j < height; ++j) {
989 dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
990 dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
996 void TransposeUV(const uint8* src, int src_stride,
997 uint8* dst_a, int dst_stride_a,
998 uint8* dst_b, int dst_stride_b,
999 int width, int height) {
1001 void (*TransposeUVWx8)(const uint8* src, int src_stride,
1002 uint8* dst_a, int dst_stride_a,
1003 uint8* dst_b, int dst_stride_b,
1004 int width) = TransposeUVWx8_C;
1005 #if defined(HAS_TRANSPOSE_UVWX8_NEON)
1006 if (TestCpuFlag(kCpuHasNEON)) {
1007 TransposeUVWx8 = TransposeUVWx8_NEON;
1010 #if defined(HAS_TRANSPOSE_UVWX8_SSE2)
1011 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
1012 TransposeUVWx8 = TransposeUVWx8_SSE2;
1015 #if defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
1016 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
1017 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
1018 TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
1022 // Work through the source in 8x8 tiles.
1024 TransposeUVWx8(src, src_stride,
1025 dst_a, dst_stride_a,
1026 dst_b, dst_stride_b,
1028 src += 8 * src_stride; // Go down 8 rows.
1029 dst_a += 8; // Move over 8 columns.
1030 dst_b += 8; // Move over 8 columns.
1034 TransposeUVWxH_C(src, src_stride,
1035 dst_a, dst_stride_a,
1036 dst_b, dst_stride_b,
1041 void RotateUV90(const uint8* src, int src_stride,
1042 uint8* dst_a, int dst_stride_a,
1043 uint8* dst_b, int dst_stride_b,
1044 int width, int height) {
1045 src += src_stride * (height - 1);
1046 src_stride = -src_stride;
1048 TransposeUV(src, src_stride,
1049 dst_a, dst_stride_a,
1050 dst_b, dst_stride_b,
1055 void RotateUV270(const uint8* src, int src_stride,
1056 uint8* dst_a, int dst_stride_a,
1057 uint8* dst_b, int dst_stride_b,
1058 int width, int height) {
1059 dst_a += dst_stride_a * (width - 1);
1060 dst_b += dst_stride_b * (width - 1);
1061 dst_stride_a = -dst_stride_a;
1062 dst_stride_b = -dst_stride_b;
1064 TransposeUV(src, src_stride,
1065 dst_a, dst_stride_a,
1066 dst_b, dst_stride_b,
1070 // Rotate 180 is a horizontal and vertical flip.
1072 void RotateUV180(const uint8* src, int src_stride,
1073 uint8* dst_a, int dst_stride_a,
1074 uint8* dst_b, int dst_stride_b,
1075 int width, int height) {
1077 void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
1079 #if defined(HAS_MIRRORUVROW_NEON)
1080 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1081 MirrorRowUV = MirrorUVRow_NEON;
1084 #if defined(HAS_MIRRORROW_UV_SSSE3)
1085 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
1086 MirrorRowUV = MirrorUVRow_SSSE3;
1089 #if defined(HAS_MIRRORUVROW_MIPS_DSPR2)
1090 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
1091 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
1092 MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
1096 dst_a += dst_stride_a * (height - 1);
1097 dst_b += dst_stride_b * (height - 1);
1099 for (i = 0; i < height; ++i) {
1100 MirrorRowUV(src, dst_a, dst_b, width);
1102 dst_a -= dst_stride_a;
1103 dst_b -= dst_stride_b;
1108 int RotatePlane(const uint8* src, int src_stride,
1109 uint8* dst, int dst_stride,
1110 int width, int height,
1111 enum RotationMode mode) {
1112 if (!src || width <= 0 || height == 0 || !dst) {
1116 // Negative height means invert the image.
1119 src = src + (height - 1) * src_stride;
1120 src_stride = -src_stride;
1126 CopyPlane(src, src_stride,
1131 RotatePlane90(src, src_stride,
1136 RotatePlane270(src, src_stride,
1141 RotatePlane180(src, src_stride,
1152 int I420Rotate(const uint8* src_y, int src_stride_y,
1153 const uint8* src_u, int src_stride_u,
1154 const uint8* src_v, int src_stride_v,
1155 uint8* dst_y, int dst_stride_y,
1156 uint8* dst_u, int dst_stride_u,
1157 uint8* dst_v, int dst_stride_v,
1158 int width, int height,
1159 enum RotationMode mode) {
1160 int halfwidth = (width + 1) >> 1;
1161 int halfheight = (height + 1) >> 1;
1162 if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
1163 !dst_y || !dst_u || !dst_v) {
1167 // Negative height means invert the image.
1170 halfheight = (height + 1) >> 1;
1171 src_y = src_y + (height - 1) * src_stride_y;
1172 src_u = src_u + (halfheight - 1) * src_stride_u;
1173 src_v = src_v + (halfheight - 1) * src_stride_v;
1174 src_stride_y = -src_stride_y;
1175 src_stride_u = -src_stride_u;
1176 src_stride_v = -src_stride_v;
1182 return I420Copy(src_y, src_stride_y,
1183 src_u, src_stride_u,
1184 src_v, src_stride_v,
1185 dst_y, dst_stride_y,
1186 dst_u, dst_stride_u,
1187 dst_v, dst_stride_v,
1190 RotatePlane90(src_y, src_stride_y,
1191 dst_y, dst_stride_y,
1193 RotatePlane90(src_u, src_stride_u,
1194 dst_u, dst_stride_u,
1195 halfwidth, halfheight);
1196 RotatePlane90(src_v, src_stride_v,
1197 dst_v, dst_stride_v,
1198 halfwidth, halfheight);
1201 RotatePlane270(src_y, src_stride_y,
1202 dst_y, dst_stride_y,
1204 RotatePlane270(src_u, src_stride_u,
1205 dst_u, dst_stride_u,
1206 halfwidth, halfheight);
1207 RotatePlane270(src_v, src_stride_v,
1208 dst_v, dst_stride_v,
1209 halfwidth, halfheight);
1212 RotatePlane180(src_y, src_stride_y,
1213 dst_y, dst_stride_y,
1215 RotatePlane180(src_u, src_stride_u,
1216 dst_u, dst_stride_u,
1217 halfwidth, halfheight);
1218 RotatePlane180(src_v, src_stride_v,
1219 dst_v, dst_stride_v,
1220 halfwidth, halfheight);
1229 int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
1230 const uint8* src_uv, int src_stride_uv,
1231 uint8* dst_y, int dst_stride_y,
1232 uint8* dst_u, int dst_stride_u,
1233 uint8* dst_v, int dst_stride_v,
1234 int width, int height,
1235 enum RotationMode mode) {
1236 int halfwidth = (width + 1) >> 1;
1237 int halfheight = (height + 1) >> 1;
1238 if (!src_y || !src_uv || width <= 0 || height == 0 ||
1239 !dst_y || !dst_u || !dst_v) {
1243 // Negative height means invert the image.
1246 halfheight = (height + 1) >> 1;
1247 src_y = src_y + (height - 1) * src_stride_y;
1248 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
1249 src_stride_y = -src_stride_y;
1250 src_stride_uv = -src_stride_uv;
1256 return NV12ToI420(src_y, src_stride_y,
1257 src_uv, src_stride_uv,
1258 dst_y, dst_stride_y,
1259 dst_u, dst_stride_u,
1260 dst_v, dst_stride_v,
1263 RotatePlane90(src_y, src_stride_y,
1264 dst_y, dst_stride_y,
1266 RotateUV90(src_uv, src_stride_uv,
1267 dst_u, dst_stride_u,
1268 dst_v, dst_stride_v,
1269 halfwidth, halfheight);
1272 RotatePlane270(src_y, src_stride_y,
1273 dst_y, dst_stride_y,
1275 RotateUV270(src_uv, src_stride_uv,
1276 dst_u, dst_stride_u,
1277 dst_v, dst_stride_v,
1278 halfwidth, halfheight);
1281 RotatePlane180(src_y, src_stride_y,
1282 dst_y, dst_stride_y,
1284 RotateUV180(src_uv, src_stride_uv,
1285 dst_u, dst_stride_u,
1286 dst_v, dst_stride_v,
1287 halfwidth, halfheight);
1297 } // namespace libyuv