2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "libyuv/rotate.h"
13 #include "libyuv/cpu_id.h"
14 #include "libyuv/convert.h"
15 #include "libyuv/planar_functions.h"
16 #include "libyuv/row.h"
23 #if !defined(LIBYUV_DISABLE_X86) && \
24 (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
25 #if defined(__APPLE__) && defined(__i386__)
26 #define DECLARE_FUNCTION(name) \
28 ".private_extern _" #name " \n" \
31 #elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
32 #define DECLARE_FUNCTION(name) \
37 #define DECLARE_FUNCTION(name) \
44 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
45 (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
46 #define HAS_MIRRORROW_NEON
47 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
48 #define HAS_MIRRORROW_UV_NEON
49 void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
50 #define HAS_TRANSPOSE_WX8_NEON
51 void TransposeWx8_NEON(const uint8* src, int src_stride,
52 uint8* dst, int dst_stride, int width);
53 #define HAS_TRANSPOSE_UVWX8_NEON
54 void TransposeUVWx8_NEON(const uint8* src, int src_stride,
55 uint8* dst_a, int dst_stride_a,
56 uint8* dst_b, int dst_stride_b,
58 #endif // defined(__ARM_NEON__)
60 #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
61 defined(__mips__) && \
62 defined(__mips_dsp) && (__mips_dsp_rev >= 2)
63 #define HAS_TRANSPOSE_WX8_MIPS_DSPR2
64 void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
65 uint8* dst, int dst_stride, int width);
67 void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
68 uint8* dst, int dst_stride, int width);
69 #define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
70 void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
71 uint8* dst_a, int dst_stride_a,
72 uint8* dst_b, int dst_stride_b,
74 #endif // defined(__mips__)
76 #if !defined(LIBYUV_DISABLE_X86) && \
77 defined(_M_IX86) && defined(_MSC_VER)
78 #define HAS_TRANSPOSE_WX8_SSSE3
79 __declspec(naked) __declspec(align(16))
80 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
81 uint8* dst, int dst_stride, int width) {
86 mov eax, [esp + 12 + 4] // src
87 mov edi, [esp + 12 + 8] // src_stride
88 mov edx, [esp + 12 + 12] // dst
89 mov esi, [esp + 12 + 16] // dst_stride
90 mov ecx, [esp + 12 + 20] // width
92 // Read in the data from the source pointer.
93 // First round of bit swap.
96 movq xmm0, qword ptr [eax]
98 movq xmm1, qword ptr [eax + edi]
99 lea eax, [eax + 2 * edi]
101 movq xmm2, qword ptr [eax]
103 palignr xmm1, xmm1, 8
104 movq xmm3, qword ptr [eax + edi]
105 lea eax, [eax + 2 * edi]
108 movq xmm4, qword ptr [eax]
109 palignr xmm3, xmm3, 8
110 movq xmm5, qword ptr [eax + edi]
112 lea eax, [eax + 2 * edi]
114 movq xmm6, qword ptr [eax]
115 palignr xmm5, xmm5, 8
116 movq xmm7, qword ptr [eax + edi]
120 palignr xmm7, xmm7, 8
121 // Second round of bit swap.
126 palignr xmm2, xmm2, 8
127 palignr xmm3, xmm3, 8
132 palignr xmm6, xmm6, 8
133 palignr xmm7, xmm7, 8
134 // Third round of bit swap.
135 // Write to the destination pointer.
137 movq qword ptr [edx], xmm0
139 palignr xmm4, xmm4, 8
140 movq qword ptr [edx + esi], xmm4
141 lea edx, [edx + 2 * esi]
144 palignr xmm6, xmm6, 8
145 movq qword ptr [edx], xmm2
147 movq qword ptr [edx + esi], xmm6
148 lea edx, [edx + 2 * esi]
150 movq qword ptr [edx], xmm1
151 palignr xmm5, xmm5, 8
153 movq qword ptr [edx + esi], xmm5
154 lea edx, [edx + 2 * esi]
155 movq qword ptr [edx], xmm3
157 palignr xmm7, xmm7, 8
159 movq qword ptr [edx + esi], xmm7
160 lea edx, [edx + 2 * esi]
170 #define HAS_TRANSPOSE_UVWX8_SSE2
171 __declspec(naked) __declspec(align(16))
172 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
173 uint8* dst_a, int dst_stride_a,
174 uint8* dst_b, int dst_stride_b,
181 mov eax, [esp + 16 + 4] // src
182 mov edi, [esp + 16 + 8] // src_stride
183 mov edx, [esp + 16 + 12] // dst_a
184 mov esi, [esp + 16 + 16] // dst_stride_a
185 mov ebx, [esp + 16 + 20] // dst_b
186 mov ebp, [esp + 16 + 24] // dst_stride_b
191 mov ecx, [ecx + 16 + 28] // w
195 // Read in the data from the source pointer.
196 // First round of bit swap.
198 movdqa xmm1, [eax + edi]
199 lea eax, [eax + 2 * edi]
200 movdqa xmm7, xmm0 // use xmm7 as temp register.
205 movdqa xmm3, [eax + edi]
206 lea eax, [eax + 2 * edi]
212 movdqa xmm5, [eax + edi]
213 lea eax, [eax + 2 * edi]
219 movdqa xmm7, [eax + edi]
220 lea eax, [eax + 2 * edi]
221 movdqa [esp], xmm5 // backup xmm5
223 movdqa xmm5, xmm6 // use xmm5 as temp register.
227 lea eax, [eax + 8 * edi + 16]
229 // Second round of bit swap.
242 movdqa xmm5, [esp] // restore xmm5
243 movdqa [esp], xmm6 // backup xmm6
244 movdqa xmm6, xmm5 // use xmm6 as temp register.
248 // Third round of bit swap.
249 // Write to the destination pointer.
254 movdqa xmm6, [esp] // restore xmm6
255 movlpd qword ptr [edx], xmm0
256 movhpd qword ptr [ebx], xmm0
257 movlpd qword ptr [edx + esi], xmm4
258 lea edx, [edx + 2 * esi]
259 movhpd qword ptr [ebx + ebp], xmm4
260 lea ebx, [ebx + 2 * ebp]
261 movdqa xmm0, xmm2 // use xmm0 as the temp register.
263 movlpd qword ptr [edx], xmm2
264 movhpd qword ptr [ebx], xmm2
266 movlpd qword ptr [edx + esi], xmm0
267 lea edx, [edx + 2 * esi]
268 movhpd qword ptr [ebx + ebp], xmm0
269 lea ebx, [ebx + 2 * ebp]
270 movdqa xmm0, xmm1 // use xmm0 as the temp register.
272 movlpd qword ptr [edx], xmm1
273 movhpd qword ptr [ebx], xmm1
275 movlpd qword ptr [edx + esi], xmm0
276 lea edx, [edx + 2 * esi]
277 movhpd qword ptr [ebx + ebp], xmm0
278 lea ebx, [ebx + 2 * ebp]
279 movdqa xmm0, xmm3 // use xmm0 as the temp register.
281 movlpd qword ptr [edx], xmm3
282 movhpd qword ptr [ebx], xmm3
285 movlpd qword ptr [edx + esi], xmm0
286 lea edx, [edx + 2 * esi]
287 movhpd qword ptr [ebx + ebp], xmm0
288 lea ebx, [ebx + 2 * ebp]
299 #elif !defined(LIBYUV_DISABLE_X86) && \
300 (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
301 #define HAS_TRANSPOSE_WX8_SSSE3
302 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
303 uint8* dst, int dst_stride, int width) {
305 // Read in the data from the source pointer.
306 // First round of bit swap.
309 "movq (%0),%%xmm0 \n"
310 "movq (%0,%3),%%xmm1 \n"
311 "lea (%0,%3,2),%0 \n"
312 "punpcklbw %%xmm1,%%xmm0 \n"
313 "movq (%0),%%xmm2 \n"
314 "movdqa %%xmm0,%%xmm1 \n"
315 "palignr $0x8,%%xmm1,%%xmm1 \n"
316 "movq (%0,%3),%%xmm3 \n"
317 "lea (%0,%3,2),%0 \n"
318 "punpcklbw %%xmm3,%%xmm2 \n"
319 "movdqa %%xmm2,%%xmm3 \n"
320 "movq (%0),%%xmm4 \n"
321 "palignr $0x8,%%xmm3,%%xmm3 \n"
322 "movq (%0,%3),%%xmm5 \n"
323 "lea (%0,%3,2),%0 \n"
324 "punpcklbw %%xmm5,%%xmm4 \n"
325 "movdqa %%xmm4,%%xmm5 \n"
326 "movq (%0),%%xmm6 \n"
327 "palignr $0x8,%%xmm5,%%xmm5 \n"
328 "movq (%0,%3),%%xmm7 \n"
329 "lea (%0,%3,2),%0 \n"
330 "punpcklbw %%xmm7,%%xmm6 \n"
332 "movdqa %%xmm6,%%xmm7 \n"
333 "lea 0x8(%0,%3,8),%0 \n"
334 "palignr $0x8,%%xmm7,%%xmm7 \n"
336 // Second round of bit swap.
337 "punpcklwd %%xmm2,%%xmm0 \n"
338 "punpcklwd %%xmm3,%%xmm1 \n"
339 "movdqa %%xmm0,%%xmm2 \n"
340 "movdqa %%xmm1,%%xmm3 \n"
341 "palignr $0x8,%%xmm2,%%xmm2 \n"
342 "palignr $0x8,%%xmm3,%%xmm3 \n"
343 "punpcklwd %%xmm6,%%xmm4 \n"
344 "punpcklwd %%xmm7,%%xmm5 \n"
345 "movdqa %%xmm4,%%xmm6 \n"
346 "movdqa %%xmm5,%%xmm7 \n"
347 "palignr $0x8,%%xmm6,%%xmm6 \n"
348 "palignr $0x8,%%xmm7,%%xmm7 \n"
349 // Third round of bit swap.
350 // Write to the destination pointer.
351 "punpckldq %%xmm4,%%xmm0 \n"
352 "movq %%xmm0,(%1) \n"
353 "movdqa %%xmm0,%%xmm4 \n"
354 "palignr $0x8,%%xmm4,%%xmm4 \n"
355 "movq %%xmm4,(%1,%4) \n"
356 "lea (%1,%4,2),%1 \n"
357 "punpckldq %%xmm6,%%xmm2 \n"
358 "movdqa %%xmm2,%%xmm6 \n"
359 "movq %%xmm2,(%1) \n"
360 "palignr $0x8,%%xmm6,%%xmm6 \n"
361 "punpckldq %%xmm5,%%xmm1 \n"
362 "movq %%xmm6,(%1,%4) \n"
363 "lea (%1,%4,2),%1 \n"
364 "movdqa %%xmm1,%%xmm5 \n"
365 "movq %%xmm1,(%1) \n"
366 "palignr $0x8,%%xmm5,%%xmm5 \n"
367 "movq %%xmm5,(%1,%4) \n"
368 "lea (%1,%4,2),%1 \n"
369 "punpckldq %%xmm7,%%xmm3 \n"
370 "movq %%xmm3,(%1) \n"
371 "movdqa %%xmm3,%%xmm7 \n"
372 "palignr $0x8,%%xmm7,%%xmm7 \n"
374 "movq %%xmm7,(%1,%4) \n"
375 "lea (%1,%4,2),%1 \n"
380 : "r"((intptr_t)(src_stride)), // %3
381 "r"((intptr_t)(dst_stride)) // %4
383 #if defined(__SSE2__)
384 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
389 #if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)
390 #define HAS_TRANSPOSE_UVWX8_SSE2
391 void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
392 uint8* dst_a, int dst_stride_a,
393 uint8* dst_b, int dst_stride_b,
396 DECLARE_FUNCTION(TransposeUVWx8_SSE2)
401 "mov 0x14(%esp),%eax \n"
402 "mov 0x18(%esp),%edi \n"
403 "mov 0x1c(%esp),%edx \n"
404 "mov 0x20(%esp),%esi \n"
405 "mov 0x24(%esp),%ebx \n"
406 "mov 0x28(%esp),%ebp \n"
409 "and $0xfffffff0,%esp \n"
410 "mov %ecx,0x10(%esp) \n"
411 "mov 0x2c(%ecx),%ecx \n"
414 "movdqa (%eax),%xmm0 \n"
415 "movdqa (%eax,%edi,1),%xmm1 \n"
416 "lea (%eax,%edi,2),%eax \n"
417 "movdqa %xmm0,%xmm7 \n"
418 "punpcklbw %xmm1,%xmm0 \n"
419 "punpckhbw %xmm1,%xmm7 \n"
420 "movdqa %xmm7,%xmm1 \n"
421 "movdqa (%eax),%xmm2 \n"
422 "movdqa (%eax,%edi,1),%xmm3 \n"
423 "lea (%eax,%edi,2),%eax \n"
424 "movdqa %xmm2,%xmm7 \n"
425 "punpcklbw %xmm3,%xmm2 \n"
426 "punpckhbw %xmm3,%xmm7 \n"
427 "movdqa %xmm7,%xmm3 \n"
428 "movdqa (%eax),%xmm4 \n"
429 "movdqa (%eax,%edi,1),%xmm5 \n"
430 "lea (%eax,%edi,2),%eax \n"
431 "movdqa %xmm4,%xmm7 \n"
432 "punpcklbw %xmm5,%xmm4 \n"
433 "punpckhbw %xmm5,%xmm7 \n"
434 "movdqa %xmm7,%xmm5 \n"
435 "movdqa (%eax),%xmm6 \n"
436 "movdqa (%eax,%edi,1),%xmm7 \n"
437 "lea (%eax,%edi,2),%eax \n"
438 "movdqa %xmm5,(%esp) \n"
440 "movdqa %xmm6,%xmm5 \n"
441 "punpcklbw %xmm7,%xmm6 \n"
442 "punpckhbw %xmm7,%xmm5 \n"
443 "movdqa %xmm5,%xmm7 \n"
444 "lea 0x10(%eax,%edi,8),%eax \n"
446 "movdqa %xmm0,%xmm5 \n"
447 "punpcklwd %xmm2,%xmm0 \n"
448 "punpckhwd %xmm2,%xmm5 \n"
449 "movdqa %xmm5,%xmm2 \n"
450 "movdqa %xmm1,%xmm5 \n"
451 "punpcklwd %xmm3,%xmm1 \n"
452 "punpckhwd %xmm3,%xmm5 \n"
453 "movdqa %xmm5,%xmm3 \n"
454 "movdqa %xmm4,%xmm5 \n"
455 "punpcklwd %xmm6,%xmm4 \n"
456 "punpckhwd %xmm6,%xmm5 \n"
457 "movdqa %xmm5,%xmm6 \n"
458 "movdqa (%esp),%xmm5 \n"
459 "movdqa %xmm6,(%esp) \n"
460 "movdqa %xmm5,%xmm6 \n"
461 "punpcklwd %xmm7,%xmm5 \n"
462 "punpckhwd %xmm7,%xmm6 \n"
463 "movdqa %xmm6,%xmm7 \n"
464 "movdqa %xmm0,%xmm6 \n"
465 "punpckldq %xmm4,%xmm0 \n"
466 "punpckhdq %xmm4,%xmm6 \n"
467 "movdqa %xmm6,%xmm4 \n"
468 "movdqa (%esp),%xmm6 \n"
469 "movlpd %xmm0,(%edx) \n"
470 "movhpd %xmm0,(%ebx) \n"
471 "movlpd %xmm4,(%edx,%esi,1) \n"
472 "lea (%edx,%esi,2),%edx \n"
473 "movhpd %xmm4,(%ebx,%ebp,1) \n"
474 "lea (%ebx,%ebp,2),%ebx \n"
475 "movdqa %xmm2,%xmm0 \n"
476 "punpckldq %xmm6,%xmm2 \n"
477 "movlpd %xmm2,(%edx) \n"
478 "movhpd %xmm2,(%ebx) \n"
479 "punpckhdq %xmm6,%xmm0 \n"
480 "movlpd %xmm0,(%edx,%esi,1) \n"
481 "lea (%edx,%esi,2),%edx \n"
482 "movhpd %xmm0,(%ebx,%ebp,1) \n"
483 "lea (%ebx,%ebp,2),%ebx \n"
484 "movdqa %xmm1,%xmm0 \n"
485 "punpckldq %xmm5,%xmm1 \n"
486 "movlpd %xmm1,(%edx) \n"
487 "movhpd %xmm1,(%ebx) \n"
488 "punpckhdq %xmm5,%xmm0 \n"
489 "movlpd %xmm0,(%edx,%esi,1) \n"
490 "lea (%edx,%esi,2),%edx \n"
491 "movhpd %xmm0,(%ebx,%ebp,1) \n"
492 "lea (%ebx,%ebp,2),%ebx \n"
493 "movdqa %xmm3,%xmm0 \n"
494 "punpckldq %xmm7,%xmm3 \n"
495 "movlpd %xmm3,(%edx) \n"
496 "movhpd %xmm3,(%ebx) \n"
497 "punpckhdq %xmm7,%xmm0 \n"
499 "movlpd %xmm0,(%edx,%esi,1) \n"
500 "lea (%edx,%esi,2),%edx \n"
501 "movhpd %xmm0,(%ebx,%ebp,1) \n"
502 "lea (%ebx,%ebp,2),%ebx \n"
504 "mov 0x10(%esp),%esp \n"
509 #if defined(__native_client__)
511 "and $0xffffffe0,%ecx \n"
517 #elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
519 // 64 bit version has enough registers to do 16x8 to 8x16 at a time.
520 #define HAS_TRANSPOSE_WX8_FAST_SSSE3
521 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
522 uint8* dst, int dst_stride, int width) {
524 // Read in the data from the source pointer.
525 // First round of bit swap.
528 "movdqa (%0),%%xmm0 \n"
529 "movdqa (%0,%3),%%xmm1 \n"
530 "lea (%0,%3,2),%0 \n"
531 "movdqa %%xmm0,%%xmm8 \n"
532 "punpcklbw %%xmm1,%%xmm0 \n"
533 "punpckhbw %%xmm1,%%xmm8 \n"
534 "movdqa (%0),%%xmm2 \n"
535 "movdqa %%xmm0,%%xmm1 \n"
536 "movdqa %%xmm8,%%xmm9 \n"
537 "palignr $0x8,%%xmm1,%%xmm1 \n"
538 "palignr $0x8,%%xmm9,%%xmm9 \n"
539 "movdqa (%0,%3),%%xmm3 \n"
540 "lea (%0,%3,2),%0 \n"
541 "movdqa %%xmm2,%%xmm10 \n"
542 "punpcklbw %%xmm3,%%xmm2 \n"
543 "punpckhbw %%xmm3,%%xmm10 \n"
544 "movdqa %%xmm2,%%xmm3 \n"
545 "movdqa %%xmm10,%%xmm11 \n"
546 "movdqa (%0),%%xmm4 \n"
547 "palignr $0x8,%%xmm3,%%xmm3 \n"
548 "palignr $0x8,%%xmm11,%%xmm11 \n"
549 "movdqa (%0,%3),%%xmm5 \n"
550 "lea (%0,%3,2),%0 \n"
551 "movdqa %%xmm4,%%xmm12 \n"
552 "punpcklbw %%xmm5,%%xmm4 \n"
553 "punpckhbw %%xmm5,%%xmm12 \n"
554 "movdqa %%xmm4,%%xmm5 \n"
555 "movdqa %%xmm12,%%xmm13 \n"
556 "movdqa (%0),%%xmm6 \n"
557 "palignr $0x8,%%xmm5,%%xmm5 \n"
558 "palignr $0x8,%%xmm13,%%xmm13 \n"
559 "movdqa (%0,%3),%%xmm7 \n"
560 "lea (%0,%3,2),%0 \n"
561 "movdqa %%xmm6,%%xmm14 \n"
562 "punpcklbw %%xmm7,%%xmm6 \n"
563 "punpckhbw %%xmm7,%%xmm14 \n"
565 "movdqa %%xmm6,%%xmm7 \n"
566 "movdqa %%xmm14,%%xmm15 \n"
567 "lea 0x10(%0,%3,8),%0 \n"
568 "palignr $0x8,%%xmm7,%%xmm7 \n"
569 "palignr $0x8,%%xmm15,%%xmm15 \n"
571 // Second round of bit swap.
572 "punpcklwd %%xmm2,%%xmm0 \n"
573 "punpcklwd %%xmm3,%%xmm1 \n"
574 "movdqa %%xmm0,%%xmm2 \n"
575 "movdqa %%xmm1,%%xmm3 \n"
576 "palignr $0x8,%%xmm2,%%xmm2 \n"
577 "palignr $0x8,%%xmm3,%%xmm3 \n"
578 "punpcklwd %%xmm6,%%xmm4 \n"
579 "punpcklwd %%xmm7,%%xmm5 \n"
580 "movdqa %%xmm4,%%xmm6 \n"
581 "movdqa %%xmm5,%%xmm7 \n"
582 "palignr $0x8,%%xmm6,%%xmm6 \n"
583 "palignr $0x8,%%xmm7,%%xmm7 \n"
584 "punpcklwd %%xmm10,%%xmm8 \n"
585 "punpcklwd %%xmm11,%%xmm9 \n"
586 "movdqa %%xmm8,%%xmm10 \n"
587 "movdqa %%xmm9,%%xmm11 \n"
588 "palignr $0x8,%%xmm10,%%xmm10 \n"
589 "palignr $0x8,%%xmm11,%%xmm11 \n"
590 "punpcklwd %%xmm14,%%xmm12 \n"
591 "punpcklwd %%xmm15,%%xmm13 \n"
592 "movdqa %%xmm12,%%xmm14 \n"
593 "movdqa %%xmm13,%%xmm15 \n"
594 "palignr $0x8,%%xmm14,%%xmm14 \n"
595 "palignr $0x8,%%xmm15,%%xmm15 \n"
596 // Third round of bit swap.
597 // Write to the destination pointer.
598 "punpckldq %%xmm4,%%xmm0 \n"
599 "movq %%xmm0,(%1) \n"
600 "movdqa %%xmm0,%%xmm4 \n"
601 "palignr $0x8,%%xmm4,%%xmm4 \n"
602 "movq %%xmm4,(%1,%4) \n"
603 "lea (%1,%4,2),%1 \n"
604 "punpckldq %%xmm6,%%xmm2 \n"
605 "movdqa %%xmm2,%%xmm6 \n"
606 "movq %%xmm2,(%1) \n"
607 "palignr $0x8,%%xmm6,%%xmm6 \n"
608 "punpckldq %%xmm5,%%xmm1 \n"
609 "movq %%xmm6,(%1,%4) \n"
610 "lea (%1,%4,2),%1 \n"
611 "movdqa %%xmm1,%%xmm5 \n"
612 "movq %%xmm1,(%1) \n"
613 "palignr $0x8,%%xmm5,%%xmm5 \n"
614 "movq %%xmm5,(%1,%4) \n"
615 "lea (%1,%4,2),%1 \n"
616 "punpckldq %%xmm7,%%xmm3 \n"
617 "movq %%xmm3,(%1) \n"
618 "movdqa %%xmm3,%%xmm7 \n"
619 "palignr $0x8,%%xmm7,%%xmm7 \n"
620 "movq %%xmm7,(%1,%4) \n"
621 "lea (%1,%4,2),%1 \n"
622 "punpckldq %%xmm12,%%xmm8 \n"
623 "movq %%xmm8,(%1) \n"
624 "movdqa %%xmm8,%%xmm12 \n"
625 "palignr $0x8,%%xmm12,%%xmm12 \n"
626 "movq %%xmm12,(%1,%4) \n"
627 "lea (%1,%4,2),%1 \n"
628 "punpckldq %%xmm14,%%xmm10 \n"
629 "movdqa %%xmm10,%%xmm14 \n"
630 "movq %%xmm10,(%1) \n"
631 "palignr $0x8,%%xmm14,%%xmm14 \n"
632 "punpckldq %%xmm13,%%xmm9 \n"
633 "movq %%xmm14,(%1,%4) \n"
634 "lea (%1,%4,2),%1 \n"
635 "movdqa %%xmm9,%%xmm13 \n"
636 "movq %%xmm9,(%1) \n"
637 "palignr $0x8,%%xmm13,%%xmm13 \n"
638 "movq %%xmm13,(%1,%4) \n"
639 "lea (%1,%4,2),%1 \n"
640 "punpckldq %%xmm15,%%xmm11 \n"
641 "movq %%xmm11,(%1) \n"
642 "movdqa %%xmm11,%%xmm15 \n"
643 "palignr $0x8,%%xmm15,%%xmm15 \n"
645 "movq %%xmm15,(%1,%4) \n"
646 "lea (%1,%4,2),%1 \n"
651 : "r"((intptr_t)(src_stride)), // %3
652 "r"((intptr_t)(dst_stride)) // %4
654 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
655 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
659 #define HAS_TRANSPOSE_UVWX8_SSE2
660 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
661 uint8* dst_a, int dst_stride_a,
662 uint8* dst_b, int dst_stride_b,
665 // Read in the data from the source pointer.
666 // First round of bit swap.
669 "movdqa (%0),%%xmm0 \n"
670 "movdqa (%0,%4),%%xmm1 \n"
671 "lea (%0,%4,2),%0 \n"
672 "movdqa %%xmm0,%%xmm8 \n"
673 "punpcklbw %%xmm1,%%xmm0 \n"
674 "punpckhbw %%xmm1,%%xmm8 \n"
675 "movdqa %%xmm8,%%xmm1 \n"
676 "movdqa (%0),%%xmm2 \n"
677 "movdqa (%0,%4),%%xmm3 \n"
678 "lea (%0,%4,2),%0 \n"
679 "movdqa %%xmm2,%%xmm8 \n"
680 "punpcklbw %%xmm3,%%xmm2 \n"
681 "punpckhbw %%xmm3,%%xmm8 \n"
682 "movdqa %%xmm8,%%xmm3 \n"
683 "movdqa (%0),%%xmm4 \n"
684 "movdqa (%0,%4),%%xmm5 \n"
685 "lea (%0,%4,2),%0 \n"
686 "movdqa %%xmm4,%%xmm8 \n"
687 "punpcklbw %%xmm5,%%xmm4 \n"
688 "punpckhbw %%xmm5,%%xmm8 \n"
689 "movdqa %%xmm8,%%xmm5 \n"
690 "movdqa (%0),%%xmm6 \n"
691 "movdqa (%0,%4),%%xmm7 \n"
692 "lea (%0,%4,2),%0 \n"
693 "movdqa %%xmm6,%%xmm8 \n"
694 "punpcklbw %%xmm7,%%xmm6 \n"
696 "lea 0x10(%0,%4,8),%0 \n"
697 "punpckhbw %%xmm7,%%xmm8 \n"
698 "movdqa %%xmm8,%%xmm7 \n"
700 // Second round of bit swap.
701 "movdqa %%xmm0,%%xmm8 \n"
702 "movdqa %%xmm1,%%xmm9 \n"
703 "punpckhwd %%xmm2,%%xmm8 \n"
704 "punpckhwd %%xmm3,%%xmm9 \n"
705 "punpcklwd %%xmm2,%%xmm0 \n"
706 "punpcklwd %%xmm3,%%xmm1 \n"
707 "movdqa %%xmm8,%%xmm2 \n"
708 "movdqa %%xmm9,%%xmm3 \n"
709 "movdqa %%xmm4,%%xmm8 \n"
710 "movdqa %%xmm5,%%xmm9 \n"
711 "punpckhwd %%xmm6,%%xmm8 \n"
712 "punpckhwd %%xmm7,%%xmm9 \n"
713 "punpcklwd %%xmm6,%%xmm4 \n"
714 "punpcklwd %%xmm7,%%xmm5 \n"
715 "movdqa %%xmm8,%%xmm6 \n"
716 "movdqa %%xmm9,%%xmm7 \n"
717 // Third round of bit swap.
718 // Write to the destination pointer.
719 "movdqa %%xmm0,%%xmm8 \n"
720 "punpckldq %%xmm4,%%xmm0 \n"
721 "movlpd %%xmm0,(%1) \n" // Write back U channel
722 "movhpd %%xmm0,(%2) \n" // Write back V channel
723 "punpckhdq %%xmm4,%%xmm8 \n"
724 "movlpd %%xmm8,(%1,%5) \n"
725 "lea (%1,%5,2),%1 \n"
726 "movhpd %%xmm8,(%2,%6) \n"
727 "lea (%2,%6,2),%2 \n"
728 "movdqa %%xmm2,%%xmm8 \n"
729 "punpckldq %%xmm6,%%xmm2 \n"
730 "movlpd %%xmm2,(%1) \n"
731 "movhpd %%xmm2,(%2) \n"
732 "punpckhdq %%xmm6,%%xmm8 \n"
733 "movlpd %%xmm8,(%1,%5) \n"
734 "lea (%1,%5,2),%1 \n"
735 "movhpd %%xmm8,(%2,%6) \n"
736 "lea (%2,%6,2),%2 \n"
737 "movdqa %%xmm1,%%xmm8 \n"
738 "punpckldq %%xmm5,%%xmm1 \n"
739 "movlpd %%xmm1,(%1) \n"
740 "movhpd %%xmm1,(%2) \n"
741 "punpckhdq %%xmm5,%%xmm8 \n"
742 "movlpd %%xmm8,(%1,%5) \n"
743 "lea (%1,%5,2),%1 \n"
744 "movhpd %%xmm8,(%2,%6) \n"
745 "lea (%2,%6,2),%2 \n"
746 "movdqa %%xmm3,%%xmm8 \n"
747 "punpckldq %%xmm7,%%xmm3 \n"
748 "movlpd %%xmm3,(%1) \n"
749 "movhpd %%xmm3,(%2) \n"
750 "punpckhdq %%xmm7,%%xmm8 \n"
752 "movlpd %%xmm8,(%1,%5) \n"
753 "lea (%1,%5,2),%1 \n"
754 "movhpd %%xmm8,(%2,%6) \n"
755 "lea (%2,%6,2),%2 \n"
761 : "r"((intptr_t)(src_stride)), // %4
762 "r"((intptr_t)(dst_stride_a)), // %5
763 "r"((intptr_t)(dst_stride_b)) // %6
765 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
772 static void TransposeWx8_C(const uint8* src, int src_stride,
773 uint8* dst, int dst_stride,
776 for (i = 0; i < width; ++i) {
777 dst[0] = src[0 * src_stride];
778 dst[1] = src[1 * src_stride];
779 dst[2] = src[2 * src_stride];
780 dst[3] = src[3 * src_stride];
781 dst[4] = src[4 * src_stride];
782 dst[5] = src[5 * src_stride];
783 dst[6] = src[6 * src_stride];
784 dst[7] = src[7 * src_stride];
790 static void TransposeWxH_C(const uint8* src, int src_stride,
791 uint8* dst, int dst_stride,
792 int width, int height) {
794 for (i = 0; i < width; ++i) {
796 for (j = 0; j < height; ++j) {
797 dst[i * dst_stride + j] = src[j * src_stride + i];
803 void TransposePlane(const uint8* src, int src_stride,
804 uint8* dst, int dst_stride,
805 int width, int height) {
807 void (*TransposeWx8)(const uint8* src, int src_stride,
808 uint8* dst, int dst_stride,
809 int width) = TransposeWx8_C;
810 #if defined(HAS_TRANSPOSE_WX8_NEON)
811 if (TestCpuFlag(kCpuHasNEON)) {
812 TransposeWx8 = TransposeWx8_NEON;
815 #if defined(HAS_TRANSPOSE_WX8_SSSE3)
816 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
817 TransposeWx8 = TransposeWx8_SSSE3;
820 #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
821 if (TestCpuFlag(kCpuHasSSSE3) &&
822 IS_ALIGNED(width, 16) &&
823 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
824 TransposeWx8 = TransposeWx8_FAST_SSSE3;
827 #if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
828 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
829 if (IS_ALIGNED(width, 4) &&
830 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
831 TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
833 TransposeWx8 = TransposeWx8_MIPS_DSPR2;
838 // Work across the source in 8x8 tiles
840 TransposeWx8(src, src_stride, dst, dst_stride, width);
841 src += 8 * src_stride; // Go down 8 rows.
842 dst += 8; // Move over 8 columns.
846 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
850 void RotatePlane90(const uint8* src, int src_stride,
851 uint8* dst, int dst_stride,
852 int width, int height) {
853 // Rotate by 90 is a transpose with the source read
854 // from bottom to top. So set the source pointer to the end
855 // of the buffer and flip the sign of the source stride.
856 src += src_stride * (height - 1);
857 src_stride = -src_stride;
858 TransposePlane(src, src_stride, dst, dst_stride, width, height);
862 void RotatePlane270(const uint8* src, int src_stride,
863 uint8* dst, int dst_stride,
864 int width, int height) {
865 // Rotate by 270 is a transpose with the destination written
866 // from bottom to top. So set the destination pointer to the end
867 // of the buffer and flip the sign of the destination stride.
868 dst += dst_stride * (width - 1);
869 dst_stride = -dst_stride;
870 TransposePlane(src, src_stride, dst, dst_stride, width, height);
874 void RotatePlane180(const uint8* src, int src_stride,
875 uint8* dst, int dst_stride,
876 int width, int height) {
877 // Swap first and last row and mirror the content. Uses a temporary row.
878 align_buffer_64(row, width);
879 const uint8* src_bot = src + src_stride * (height - 1);
880 uint8* dst_bot = dst + dst_stride * (height - 1);
881 int half_height = (height + 1) >> 1;
883 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
884 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
885 #if defined(HAS_MIRRORROW_NEON)
886 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
887 MirrorRow = MirrorRow_NEON;
890 #if defined(HAS_MIRRORROW_SSE2)
891 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
892 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
893 IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
894 MirrorRow = MirrorRow_SSE2;
897 #if defined(HAS_MIRRORROW_SSSE3)
898 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
899 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
900 IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
901 MirrorRow = MirrorRow_SSSE3;
904 #if defined(HAS_MIRRORROW_AVX2)
905 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
906 MirrorRow = MirrorRow_AVX2;
909 #if defined(HAS_MIRRORROW_MIPS_DSPR2)
910 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
911 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
912 IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
913 MirrorRow = MirrorRow_MIPS_DSPR2;
916 #if defined(HAS_COPYROW_NEON)
917 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
918 CopyRow = CopyRow_NEON;
921 #if defined(HAS_COPYROW_X86)
922 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
923 CopyRow = CopyRow_X86;
926 #if defined(HAS_COPYROW_SSE2)
927 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
928 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
929 IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
930 CopyRow = CopyRow_SSE2;
933 #if defined(HAS_COPYROW_ERMS)
934 if (TestCpuFlag(kCpuHasERMS)) {
935 CopyRow = CopyRow_ERMS;
938 #if defined(HAS_COPYROW_MIPS)
939 if (TestCpuFlag(kCpuHasMIPS)) {
940 CopyRow = CopyRow_MIPS;
944 // Odd height will harmlessly mirror the middle row twice.
945 for (y = 0; y < half_height; ++y) {
946 MirrorRow(src, row, width); // Mirror first row into a buffer
948 MirrorRow(src_bot, dst, width); // Mirror last row into first row
950 CopyRow(row, dst_bot, width); // Copy first mirrored row into last
951 src_bot -= src_stride;
952 dst_bot -= dst_stride;
954 free_aligned_buffer_64(row);
957 static void TransposeUVWx8_C(const uint8* src, int src_stride,
958 uint8* dst_a, int dst_stride_a,
959 uint8* dst_b, int dst_stride_b,
962 for (i = 0; i < width; ++i) {
963 dst_a[0] = src[0 * src_stride + 0];
964 dst_b[0] = src[0 * src_stride + 1];
965 dst_a[1] = src[1 * src_stride + 0];
966 dst_b[1] = src[1 * src_stride + 1];
967 dst_a[2] = src[2 * src_stride + 0];
968 dst_b[2] = src[2 * src_stride + 1];
969 dst_a[3] = src[3 * src_stride + 0];
970 dst_b[3] = src[3 * src_stride + 1];
971 dst_a[4] = src[4 * src_stride + 0];
972 dst_b[4] = src[4 * src_stride + 1];
973 dst_a[5] = src[5 * src_stride + 0];
974 dst_b[5] = src[5 * src_stride + 1];
975 dst_a[6] = src[6 * src_stride + 0];
976 dst_b[6] = src[6 * src_stride + 1];
977 dst_a[7] = src[7 * src_stride + 0];
978 dst_b[7] = src[7 * src_stride + 1];
980 dst_a += dst_stride_a;
981 dst_b += dst_stride_b;
985 static void TransposeUVWxH_C(const uint8* src, int src_stride,
986 uint8* dst_a, int dst_stride_a,
987 uint8* dst_b, int dst_stride_b,
988 int width, int height) {
990 for (i = 0; i < width * 2; i += 2) {
992 for (j = 0; j < height; ++j) {
993 dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
994 dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
1000 void TransposeUV(const uint8* src, int src_stride,
1001 uint8* dst_a, int dst_stride_a,
1002 uint8* dst_b, int dst_stride_b,
1003 int width, int height) {
1005 void (*TransposeUVWx8)(const uint8* src, int src_stride,
1006 uint8* dst_a, int dst_stride_a,
1007 uint8* dst_b, int dst_stride_b,
1008 int width) = TransposeUVWx8_C;
1009 #if defined(HAS_TRANSPOSE_UVWX8_NEON)
1010 if (TestCpuFlag(kCpuHasNEON)) {
1011 TransposeUVWx8 = TransposeUVWx8_NEON;
1013 #elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
1014 if (TestCpuFlag(kCpuHasSSE2) &&
1015 IS_ALIGNED(width, 8) &&
1016 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
1017 TransposeUVWx8 = TransposeUVWx8_SSE2;
1019 #elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
1020 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
1021 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
1022 TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
1026 // Work through the source in 8x8 tiles.
1028 TransposeUVWx8(src, src_stride,
1029 dst_a, dst_stride_a,
1030 dst_b, dst_stride_b,
1032 src += 8 * src_stride; // Go down 8 rows.
1033 dst_a += 8; // Move over 8 columns.
1034 dst_b += 8; // Move over 8 columns.
1038 TransposeUVWxH_C(src, src_stride,
1039 dst_a, dst_stride_a,
1040 dst_b, dst_stride_b,
1045 void RotateUV90(const uint8* src, int src_stride,
1046 uint8* dst_a, int dst_stride_a,
1047 uint8* dst_b, int dst_stride_b,
1048 int width, int height) {
1049 src += src_stride * (height - 1);
1050 src_stride = -src_stride;
1052 TransposeUV(src, src_stride,
1053 dst_a, dst_stride_a,
1054 dst_b, dst_stride_b,
1059 void RotateUV270(const uint8* src, int src_stride,
1060 uint8* dst_a, int dst_stride_a,
1061 uint8* dst_b, int dst_stride_b,
1062 int width, int height) {
1063 dst_a += dst_stride_a * (width - 1);
1064 dst_b += dst_stride_b * (width - 1);
1065 dst_stride_a = -dst_stride_a;
1066 dst_stride_b = -dst_stride_b;
1068 TransposeUV(src, src_stride,
1069 dst_a, dst_stride_a,
1070 dst_b, dst_stride_b,
1074 // Rotate 180 is a horizontal and vertical flip.
1076 void RotateUV180(const uint8* src, int src_stride,
1077 uint8* dst_a, int dst_stride_a,
1078 uint8* dst_b, int dst_stride_b,
1079 int width, int height) {
1081 void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
1083 #if defined(HAS_MIRRORUVROW_NEON)
1084 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1085 MirrorRowUV = MirrorUVRow_NEON;
1087 #elif defined(HAS_MIRRORROW_UV_SSSE3)
1088 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
1089 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
1090 MirrorRowUV = MirrorUVRow_SSSE3;
1092 #elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
1093 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
1094 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
1095 MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
1099 dst_a += dst_stride_a * (height - 1);
1100 dst_b += dst_stride_b * (height - 1);
1102 for (i = 0; i < height; ++i) {
1103 MirrorRowUV(src, dst_a, dst_b, width);
1105 dst_a -= dst_stride_a;
1106 dst_b -= dst_stride_b;
1111 int RotatePlane(const uint8* src, int src_stride,
1112 uint8* dst, int dst_stride,
1113 int width, int height,
1114 enum RotationMode mode) {
1115 if (!src || width <= 0 || height == 0 || !dst) {
1119 // Negative height means invert the image.
1122 src = src + (height - 1) * src_stride;
1123 src_stride = -src_stride;
1129 CopyPlane(src, src_stride,
1134 RotatePlane90(src, src_stride,
1139 RotatePlane270(src, src_stride,
1144 RotatePlane180(src, src_stride,
1155 int I420Rotate(const uint8* src_y, int src_stride_y,
1156 const uint8* src_u, int src_stride_u,
1157 const uint8* src_v, int src_stride_v,
1158 uint8* dst_y, int dst_stride_y,
1159 uint8* dst_u, int dst_stride_u,
1160 uint8* dst_v, int dst_stride_v,
1161 int width, int height,
1162 enum RotationMode mode) {
1163 int halfwidth = (width + 1) >> 1;
1164 int halfheight = (height + 1) >> 1;
1165 if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
1166 !dst_y || !dst_u || !dst_v) {
1170 // Negative height means invert the image.
1173 halfheight = (height + 1) >> 1;
1174 src_y = src_y + (height - 1) * src_stride_y;
1175 src_u = src_u + (halfheight - 1) * src_stride_u;
1176 src_v = src_v + (halfheight - 1) * src_stride_v;
1177 src_stride_y = -src_stride_y;
1178 src_stride_u = -src_stride_u;
1179 src_stride_v = -src_stride_v;
1185 return I420Copy(src_y, src_stride_y,
1186 src_u, src_stride_u,
1187 src_v, src_stride_v,
1188 dst_y, dst_stride_y,
1189 dst_u, dst_stride_u,
1190 dst_v, dst_stride_v,
1193 RotatePlane90(src_y, src_stride_y,
1194 dst_y, dst_stride_y,
1196 RotatePlane90(src_u, src_stride_u,
1197 dst_u, dst_stride_u,
1198 halfwidth, halfheight);
1199 RotatePlane90(src_v, src_stride_v,
1200 dst_v, dst_stride_v,
1201 halfwidth, halfheight);
1204 RotatePlane270(src_y, src_stride_y,
1205 dst_y, dst_stride_y,
1207 RotatePlane270(src_u, src_stride_u,
1208 dst_u, dst_stride_u,
1209 halfwidth, halfheight);
1210 RotatePlane270(src_v, src_stride_v,
1211 dst_v, dst_stride_v,
1212 halfwidth, halfheight);
1215 RotatePlane180(src_y, src_stride_y,
1216 dst_y, dst_stride_y,
1218 RotatePlane180(src_u, src_stride_u,
1219 dst_u, dst_stride_u,
1220 halfwidth, halfheight);
1221 RotatePlane180(src_v, src_stride_v,
1222 dst_v, dst_stride_v,
1223 halfwidth, halfheight);
1232 int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
1233 const uint8* src_uv, int src_stride_uv,
1234 uint8* dst_y, int dst_stride_y,
1235 uint8* dst_u, int dst_stride_u,
1236 uint8* dst_v, int dst_stride_v,
1237 int width, int height,
1238 enum RotationMode mode) {
1239 int halfwidth = (width + 1) >> 1;
1240 int halfheight = (height + 1) >> 1;
1241 if (!src_y || !src_uv || width <= 0 || height == 0 ||
1242 !dst_y || !dst_u || !dst_v) {
1246 // Negative height means invert the image.
1249 halfheight = (height + 1) >> 1;
1250 src_y = src_y + (height - 1) * src_stride_y;
1251 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
1252 src_stride_y = -src_stride_y;
1253 src_stride_uv = -src_stride_uv;
1259 return NV12ToI420(src_y, src_stride_y,
1260 src_uv, src_stride_uv,
1261 dst_y, dst_stride_y,
1262 dst_u, dst_stride_u,
1263 dst_v, dst_stride_v,
1266 RotatePlane90(src_y, src_stride_y,
1267 dst_y, dst_stride_y,
1269 RotateUV90(src_uv, src_stride_uv,
1270 dst_u, dst_stride_u,
1271 dst_v, dst_stride_v,
1272 halfwidth, halfheight);
1275 RotatePlane270(src_y, src_stride_y,
1276 dst_y, dst_stride_y,
1278 RotateUV270(src_uv, src_stride_uv,
1279 dst_u, dst_stride_u,
1280 dst_v, dst_stride_v,
1281 halfwidth, halfheight);
1284 RotatePlane180(src_y, src_stride_y,
1285 dst_y, dst_stride_y,
1287 RotateUV180(src_uv, src_stride_uv,
1288 dst_u, dst_stride_u,
1289 dst_v, dst_stride_v,
1290 halfwidth, halfheight);
1300 } // namespace libyuv