1 #include "evas_common.h"
2 #include "evas_convert_yuv.h"
4 #if defined BUILD_MMX || defined BUILD_SSE
8 #if defined HAVE_ALTIVEC_H
18 #ifdef BUILD_CONVERT_YUV
20 static void _evas_yuv_init (void);
21 static void _evas_yv12torgb_sse (unsigned char **yuv, unsigned char *rgb, int w, int h);
22 static void _evas_yv12torgb_mmx (unsigned char **yuv, unsigned char *rgb, int w, int h);
24 static void _evas_yv12torgb_altivec(unsigned char **yuv, unsigned char *rgb, int w, int h);
25 static void _evas_yv12torgb_diz (unsigned char **yuv, unsigned char *rgb, int w, int h);
27 static void _evas_yv12torgb_raster (unsigned char **yuv, unsigned char *rgb, int w, int h);
28 static void _evas_yuy2torgb_raster (unsigned char **yuv, unsigned char *rgb, int w, int h);
38 /* calculation float resolution in bits */
39 /* ie RES = 6 is 10.6 fixed point */
40 /* RES = 8 is 8.8 fixed point */
41 /* RES = 4 is 12.4 fixed point */
42 /* NB: going above 6 will lead to overflow... :( */
45 #define RZ(i) (i >> (BITRES - RES))
46 #define FOUR(i) {i, i, i, i}
48 #if defined BUILD_MMX || defined BUILD_SSE
49 __attribute__ ((aligned (8))) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
50 __attribute__ ((aligned (8))) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
51 __attribute__ ((aligned (8))) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
52 __attribute__ ((aligned (8))) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
53 __attribute__ ((aligned (8))) const volatile unsigned short _const_ymul [4] = FOUR(RZ(YMUL));
54 __attribute__ ((aligned (8))) const volatile unsigned short _const_128 [4] = FOUR(128);
55 __attribute__ ((aligned (8))) const volatile unsigned short _const_32 [4] = FOUR(RZ(OFF));
56 __attribute__ ((aligned (8))) const volatile unsigned short _const_16 [4] = FOUR(16);
57 __attribute__ ((aligned (8))) const volatile unsigned short _const_ff [4] = FOUR(-1);
59 #define CONST_CRVCRV *_const_crvcrv
60 #define CONST_CBUCBU *_const_cbucbu
61 #define CONST_CGUCGU *_const_cgucgu
62 #define CONST_CGVCGV *_const_cgvcgv
63 #define CONST_YMUL *_const_ymul
64 #define CONST_128 *_const_128
65 #define CONST_32 *_const_32
66 #define CONST_16 *_const_16
67 #define CONST_FF *_const_ff
69 /* for C non aligned cleanup */
70 const int _crv = RZ(CRV); /* 1.596 */
71 const int _cbu = RZ(CBU); /* 2.018 */
72 const int _cgu = RZ(CGU); /* 0.391 */
73 const int _cgv = RZ(CGV); /* 0.813 */
79 const vector unsigned short res = AVV(RES);
80 const vector signed short crv = AVV(RZ(CRV));
81 const vector signed short cbu = AVV(RZ(CBU));
82 const vector signed short cgu = AVV(RZ(CGU));
83 const vector signed short cgv = AVV(RZ(CGV));
84 const vector signed short ymul = AVV(RZ(YMUL));
85 const vector signed short c128 = AVV(128);
86 const vector signed short c32 = AVV(RZ(OFF));
87 const vector signed short c16 = AVV(16);
88 const vector unsigned char zero = AVV(0);
89 const vector signed short maxchar = AVV(255);
90 const vector unsigned char pickrg1 = AVV(0, 0x1, 0x11, 0,
94 const vector unsigned char pickrg2 = AVV(0, 0x9, 0x19, 0,
98 const vector unsigned char pickrgb1 = AVV(0x3, 0x1, 0x2, 0x11,
101 0xf, 0xd, 0xe, 0x17);
102 const vector unsigned char pickrgb2 = AVV(0x3, 0x1, 0x2, 0x19,
105 0xf, 0xd, 0xe, 0x1f);
111 /* shortcut speedup lookup-tables */
112 static short _v1164[256];
113 static short _v1596[256];
114 static short _v813[256];
115 static short _v391[256];
116 static short _v2018[256];
118 static unsigned char _clip_lut[1024];
119 #define LUT_CLIP(i) ((_clip_lut+384)[(i)])
121 #define CMP_CLIP(i) ((i&256)? (~(i>>10)) : i);
123 static int initted = 0;
128 evas_common_convert_yuv_420p_601_rgba(DATA8 **src, DATA8 *dst, int w, int h)
132 #if defined BUILD_MMX || defined BUILD_SSE
133 evas_common_cpu_can_do(&mmx, &sse, &sse2);
142 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
143 _evas_yv12torgb_sse(src, dst, w, h);
144 else if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
145 _evas_yv12torgb_mmx(src, dst, w, h);
147 if (evas_common_cpu_has_feature(CPU_FEATURE_ALTIVEC))
148 _evas_yv12torgb_altivec(src, dst, w, h);
153 if (!initted) _evas_yuv_init();
155 /* FIXME: diz may be faster sometimes */
156 _evas_yv12torgb_raster(src, dst, w, h);
161 /* Thanks to Diz for this code. i've munged it a little and turned it into */
162 /* inline macros. I tried beating it with a different algorithm using MMX */
163 /* but failed. So here we are. This is the fastest YUV->RGB i know of for */
164 /* x86. It has an issue that it doesn't convert colours accurately so the */
165 /* image looks a little "yellowy". This is a result of only 10.6 fixed point */
166 /* resolution as opposed to 16.16 in the C code. This could be fixed by */
167 /* processing half the number of pixels per cycle and going up to 32bits */
168 /* per element during compute, but it would all but negate the speedup */
169 /* from mmx I think :( It might be possible to use SSE and SSE2 here, but */
170 /* I haven't tried yet. Let's see. */
172 /* NB: XviD has almost the same code in it's assembly YV12->RGB code. same */
173 /* algorithm, same constants, same all over actually, except it actually */
174 /* does a few extra memory accesses that this one doesn't, so in theory */
175 /* this code should be faster. In the end it's all just an mmx version of */
176 /* the reference implimentation done with fixed point math */
179 _evas_yv12torgb_sse(unsigned char **yuv, unsigned char *rgb, int w, int h)
183 register unsigned char *yp1, *up, *vp;
186 /* destination pointers */
189 for (yy = 0; yy < h; yy++)
193 up = yuv[h + (yy / 2)];
194 vp = yuv[h + (h / 2) + (yy / 2)];
195 for (xx = 0; xx < (w - 7); xx += 8)
202 punpcklbw_r2r(mm7, mm2);
203 punpcklbw_r2r(mm7, mm3);
210 movq_m2r(CONST_16, mm4);
211 psubsw_r2r(mm4, mm0);
212 psubsw_r2r(mm4, mm1);
214 movq_m2r(CONST_128, mm5);
215 psubsw_r2r(mm5, mm2);
216 psubsw_r2r(mm5, mm3);
218 movq_m2r(CONST_YMUL, mm4);
219 pmullw_r2r(mm4, mm0);
220 pmullw_r2r(mm4, mm1);
222 movq_m2r(CONST_CRVCRV, mm7);
223 pmullw_r2r(mm3, mm7);
224 movq_m2r(CONST_CBUCBU, mm6);
225 pmullw_r2r(mm2, mm6);
226 movq_m2r(CONST_CGUCGU, mm5);
227 pmullw_r2r(mm2, mm5);
228 movq_m2r(CONST_CGVCGV, mm4);
229 pmullw_r2r(mm3, mm4);
232 paddsw_r2r(mm7, mm2);
233 paddsw_r2r(mm1, mm7);
237 packuswb_r2r(mm7, mm2);
241 punpckhbw_r2r(mm7, mm2);
242 punpcklbw_r2r(mm3, mm7);
246 psubsw_r2r(mm5, mm3);
247 psubsw_r2r(mm4, mm3);
248 paddsw_m2r(CONST_32, mm3);
251 psubsw_r2r(mm5, mm7);
252 psubsw_r2r(mm4, mm7);
253 paddsw_m2r(CONST_32, mm7);
257 packuswb_r2r(mm7, mm3);
261 punpckhbw_r2r(mm7, mm3);
262 punpcklbw_r2r(mm4, mm7);
265 movq_m2r(CONST_32, mm4);
266 paddsw_r2r(mm6, mm0);
267 paddsw_r2r(mm6, mm1);
268 paddsw_r2r(mm4, mm0);
269 paddsw_r2r(mm4, mm1);
272 packuswb_r2r(mm1, mm0);
276 punpckhbw_r2r(mm7, mm0);
277 punpcklbw_r2r(mm5, mm7);
280 movq_m2r(CONST_FF, mm1);
284 punpckhbw_r2r(mm3, mm2);
285 punpcklbw_r2r(mm6, mm7);
286 punpckhbw_r2r(mm1, mm0);
287 punpcklbw_r2r(mm1, mm5);
290 punpckhwd_r2r(mm5, mm7);
291 punpcklwd_r2r(mm5, mm1);
294 punpckhwd_r2r(mm0, mm2);
295 punpcklwd_r2r(mm0, mm4);
297 movntq_r2m(mm1, *(dp1));
298 movntq_r2m(mm7, *(dp1 + 8));
299 movntq_r2m(mm4, *(dp1 + 16));
300 movntq_r2m(mm2, *(dp1 + 24));
307 /* cleanup pixles that arent a multiple of 8 pixels wide */
310 int y, u, v, r, g, b;
312 for (; xx < w; xx += 2)
317 y = RZ(YMUL) * ((*yp1++) - 16);
318 r = LUT_CLIP((y + (_crv * v)) >> RES);
319 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
320 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
321 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
325 y = RZ(YMUL) * ((*yp1++) - 16);
326 r = LUT_CLIP((y + (_crv * v)) >> RES);
327 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
328 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
329 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
337 _evas_yv12torgb_mmx(yuv, rgb, w, h);
342 _evas_yv12torgb_mmx(unsigned char **yuv, unsigned char *rgb, int w, int h)
346 register unsigned char *yp1, *up, *vp;
349 /* destination pointers */
352 for (yy = 0; yy < h; yy++)
356 up = yuv[h + (yy / 2)];
357 vp = yuv[h + (h / 2) + (yy / 2)];
358 for (xx = 0; xx < (w - 7); xx += 8)
365 punpcklbw_r2r(mm7, mm2);
366 punpcklbw_r2r(mm7, mm3);
373 movq_m2r(CONST_16, mm4);
374 psubsw_r2r(mm4, mm0);
375 psubsw_r2r(mm4, mm1);
377 movq_m2r(CONST_128, mm5);
378 psubsw_r2r(mm5, mm2);
379 psubsw_r2r(mm5, mm3);
381 movq_m2r(CONST_YMUL, mm4);
382 pmullw_r2r(mm4, mm0);
383 pmullw_r2r(mm4, mm1);
385 movq_m2r(CONST_CRVCRV, mm7);
386 pmullw_r2r(mm3, mm7);
387 movq_m2r(CONST_CBUCBU, mm6);
388 pmullw_r2r(mm2, mm6);
389 movq_m2r(CONST_CGUCGU, mm5);
390 pmullw_r2r(mm2, mm5);
391 movq_m2r(CONST_CGVCGV, mm4);
392 pmullw_r2r(mm3, mm4);
395 paddsw_r2r(mm7, mm2);
396 paddsw_r2r(mm1, mm7);
400 packuswb_r2r(mm7, mm2);
404 punpckhbw_r2r(mm7, mm2);
405 punpcklbw_r2r(mm3, mm7);
409 psubsw_r2r(mm5, mm3);
410 psubsw_r2r(mm4, mm3);
411 paddsw_m2r(CONST_32, mm3);
414 psubsw_r2r(mm5, mm7);
415 psubsw_r2r(mm4, mm7);
416 paddsw_m2r(CONST_32, mm7);
420 packuswb_r2r(mm7, mm3);
424 punpckhbw_r2r(mm7, mm3);
425 punpcklbw_r2r(mm4, mm7);
428 movq_m2r(CONST_32, mm4);
429 paddsw_r2r(mm6, mm0);
430 paddsw_r2r(mm6, mm1);
431 paddsw_r2r(mm4, mm0);
432 paddsw_r2r(mm4, mm1);
435 packuswb_r2r(mm1, mm0);
439 punpckhbw_r2r(mm7, mm0);
440 punpcklbw_r2r(mm5, mm7);
443 movq_m2r(CONST_FF, mm1);
447 punpckhbw_r2r(mm3, mm2);
448 punpcklbw_r2r(mm6, mm7);
449 punpckhbw_r2r(mm1, mm0);
450 punpcklbw_r2r(mm1, mm5);
453 punpckhwd_r2r(mm5, mm7);
454 punpcklwd_r2r(mm5, mm1);
457 punpckhwd_r2r(mm0, mm2);
458 punpcklwd_r2r(mm0, mm4);
460 movq_r2m(mm1, *(dp1));
461 movq_r2m(mm7, *(dp1 + 8));
462 movq_r2m(mm4, *(dp1 + 16));
463 movq_r2m(mm2, *(dp1 + 24));
470 /* cleanup pixles that arent a multiple of 8 pixels wide */
473 int y, u, v, r, g, b;
475 for (; xx < w; xx += 2)
480 y = RZ(YMUL) * ((*yp1++) - 16);
481 r = LUT_CLIP((y + (_crv * v)) >> RES);
482 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
483 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
484 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
488 y = RZ(YMUL) * ((*yp1++) - 16);
489 r = LUT_CLIP((y + (_crv * v)) >> RES);
490 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
491 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
492 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
500 _evas_yv12torgb_raster(yuv, rgb, w, h);
506 _evas_yv12torgb_altivec(unsigned char **yuv, unsigned char *rgb, int w, int h)
511 unsigned char *yp1, *yp2, *up, *vp;
512 unsigned char *dp1, *dp2;
513 vector signed short y, u, v;
514 vector signed short r, g, b;
515 vector signed short tmp1, tmp2, tmp3;
516 vector unsigned char yperm, uperm, vperm, rgb1, rgb2;
517 vector unsigned char alpha;
519 /* handy halved w & h */
527 /* destination pointers */
531 alpha = vec_mergeh((vector unsigned char)AVV(255), zero);
532 alpha = (vector unsigned char)vec_mergeh((vector unsigned short)alpha,
533 (vector unsigned short)zero);
535 for (yy = 0; yy < h2; yy++)
537 for (xx = 0; xx < w2; xx += 4)
541 * Load 4 y and 4 u & v pixels for the 8x2 pixel block.
543 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp1);
544 /* 3 */ tmp1 = (vector signed short)vec_lde(0, (unsigned int *)up);
545 /* 3 */ tmp2 = (vector signed short)vec_lde(0, (unsigned int *)vp);
547 /* Prepare for aligning the data in their vectors */
548 /* 3 */ yperm = vec_lvsl(0, yp1);
549 /* 3 */ uperm = vec_lvsl(0, up);
550 /* 3 */ vperm = vec_lvsl(0, vp);
553 /* Save y and load the next 4 y pixels for a total of 8 */
554 /* 2 */ y = vec_perm(tmp3, tmp3, yperm);
555 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp1);
557 /* Setup and calculate the 4 u pixels */
558 /* 2 */ tmp1 = vec_perm(tmp1, tmp1, uperm);
559 /* 2 */ tmp2 = vec_perm(tmp2, tmp2, vperm);
561 /* Avoid dependency stalls on yperm and calculate the 4 u values */
562 /* 3 */ yperm = vec_lvsr(12, yp1);
563 /* 1 */ tmp1 = (vector signed short)vec_mergeh((vector unsigned char)tmp1,
564 (vector unsigned char)tmp1);
565 /* 1 */ u = (vector signed short)vec_mergeh(zero,
566 (vector unsigned char)tmp1);
568 /* 1 */ u = vec_sub(u, c128);
569 /* 2 */ tmp3 = vec_perm(tmp3, tmp3, yperm);
571 /* Setup and calculate the 4 v values */
572 /* 1 */ tmp2 = (vector signed short)vec_mergeh((vector unsigned char)tmp2,
573 (vector unsigned char)tmp2);
574 /* 1 */ v = (vector signed short)vec_mergeh(zero,
575 (vector unsigned char)tmp2);
576 /* 4 */ tmp2 = vec_mladd(cgu, u, (vector signed short)zero);
577 /* 1 */ v = vec_sub(v, c128);
579 /* Move the data into y and start loading the next 4 pixels */
580 /* 1 */ y = (vector signed short)vec_mergeh(zero,
581 (vector unsigned char)y);
582 /* 1 */ tmp3 = (vector signed short)vec_mergeh(zero,
583 (vector unsigned char)tmp3);
584 /* 1 */ y = vec_or(y, tmp3);
586 /* Finish calculating y */
587 /* 1 */ y = vec_sub(y, c16);
588 /* 4 */ y = vec_mladd(ymul, y, (vector signed short)zero);
590 /* Perform non-dependent multiplies first. */
591 /* 4 */ tmp1 = vec_mladd(crv, v, y);
592 /* 4 */ tmp2 = vec_mladd(cgv, v, tmp2);
593 /* 4 */ tmp3 = vec_mladd(cbu, u, y);
595 /* Calculate rgb values */
596 /* 1 */ r = vec_sra(tmp1, res);
598 /* 1 */ tmp2 = vec_sub(y, tmp2);
599 /* 1 */ tmp2 = vec_add(tmp2, c32);
600 /* 1 */ g = vec_sra(tmp2, res);
602 /* 1 */ tmp3 = vec_add(tmp3, c32);
603 /* 1 */ b = vec_sra(tmp3, res);
605 /* Bound to 0 <= x <= 255 */
606 /* 1 */ r = vec_min(r, maxchar);
607 /* 1 */ g = vec_min(g, maxchar);
608 /* 1 */ b = vec_min(b, maxchar);
609 /* 1 */ r = vec_max(r, (vector signed short)zero);
610 /* 1 */ g = vec_max(g, (vector signed short)zero);
611 /* 1 */ b = vec_max(b, (vector signed short)zero);
613 /* Combine r, g and b. */
614 /* 2 */ rgb1 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
616 /* 2 */ rgb2 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
619 /* 2 */ rgb1 = vec_perm(rgb1, (vector unsigned char)b, pickrgb1);
620 /* 2 */ rgb2 = vec_perm(rgb2, (vector unsigned char)b, pickrgb2);
622 /* 1 */ rgb1 = vec_or(alpha, rgb1);
623 /* 1 */ rgb2 = vec_or(alpha, rgb2);
625 /* 3 */ vec_stl(rgb1, 0, dp1);
627 /* 3 */ vec_stl(rgb2, 0, dp1);
630 * Begin the second row calculations
634 * Load 4 y pixels for the 8x2 pixel block.
636 /* 3 */ yperm = vec_lvsl(0, yp2);
637 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp2);
640 /* Save y and load the next 4 y pixels for a total of 8 */
641 /* 2 */ y = vec_perm(tmp3, tmp3, yperm);
642 /* 3 */ yperm = vec_lvsr(12, yp2);
643 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp2);
644 /* 1 */ y = (vector signed short)vec_mergeh(zero,
645 (vector unsigned char)y);
647 /* Avoid dependency stalls on yperm */
648 /* 2 */ tmp3 = vec_perm(tmp3, tmp3, yperm);
649 /* 1 */ tmp3 = (vector signed short)vec_mergeh(zero,
650 (vector unsigned char)tmp3);
651 /* 1 */ y = vec_or(y, tmp3);
653 /* Start the calculation for g */
654 /* 4 */ tmp2 = vec_mladd(cgu, u, (vector signed short)zero);
656 /* Finish calculating y */
657 /* 1 */ y = vec_sub(y, c16);
658 /* 4 */ y = vec_mladd(ymul, y, (vector signed short)zero);
660 /* Perform non-dependent multiplies first. */
661 /* 4 */ tmp2 = vec_mladd(cgv, v, tmp2);
662 /* 4 */ tmp1 = vec_mladd(crv, v, y);
663 /* 4 */ tmp3 = vec_mladd(cbu, u, y);
665 /* Calculate rgb values */
666 /* 1 */ r = vec_sra(tmp1, res);
668 /* 1 */ tmp2 = vec_sub(y, tmp2);
669 /* 1 */ tmp2 = vec_add(tmp2, c32);
670 /* 1 */ g = vec_sra(tmp2, res);
672 /* 1 */ tmp3 = vec_add(tmp3, c32);
673 /* 1 */ b = vec_sra(tmp3, res);
675 /* Bound to 0 <= x <= 255 */
676 /* 1 */ r = vec_min(r, maxchar);
677 /* 1 */ g = vec_min(g, maxchar);
678 /* 1 */ b = vec_min(b, maxchar);
679 /* 1 */ r = vec_max(r, (vector signed short)zero);
680 /* 1 */ g = vec_max(g, (vector signed short)zero);
681 /* 1 */ b = vec_max(b, (vector signed short)zero);
683 /* Combine r, g and b. */
684 /* 2 */ rgb1 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
686 /* 2 */ rgb2 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
689 /* 2 */ rgb1 = vec_perm(rgb1, (vector unsigned char)b, pickrgb1);
690 /* 2 */ rgb2 = vec_perm(rgb2, (vector unsigned char)b, pickrgb2);
692 /* 1 */ rgb1 = vec_or(alpha, rgb1);
693 /* 1 */ rgb2 = vec_or(alpha, rgb2);
695 /* 3 */ vec_stl(rgb1, 0, dp2);
697 /* 3 */ vec_stl(rgb2, 0, dp2);
699 /* Increment the YUV data pointers to the next set of pixels. */
705 /* Move the destination pointers to the next set of pixels. */
710 /* jump down one line since we are doing 2 at once */
717 _evas_yv12torgb_diz(yuv, rgb, w, h);
728 for (i = 0; i < 256; i++)
730 _v1164[i] = (int)(((float)(i - 16 )) * 1.164);
732 _v1596[i] = (int)(((float)(i - 128)) * 1.596);
733 _v813[i] = (int)(((float)(i - 128)) * 0.813);
735 _v391[i] = (int)(((float)(i - 128)) * 0.391);
736 _v2018[i] = (int)(((float)(i - 128)) * 2.018);
739 for (i = -384; i < 640; i++)
741 _clip_lut[i+384] = i < 0 ? 0 : (i > 255) ? 255 : i;
748 _evas_yv12torgb_diz(unsigned char **yuv, unsigned char *rgb, int w, int h)
752 int y, u, v, r, g, b;
753 unsigned char *yp1, *yp2, *up, *vp;
754 unsigned char *dp1, *dp2;
755 int crv, cbu, cgu, cgv;
757 /* destination pointers */
761 crv = CRV; /* 1.596 */
762 cbu = CBU; /* 2.018 */
763 cgu = CGU; /* 0.391 */
764 cgv = CGV; /* 0.813 */
766 for (yy = 0; yy < h; yy += 2)
771 up = yuv[h + (yy / 2)];
772 vp = yuv[h + (h / 2) + (yy / 2)];
773 for (xx = 0; xx < w; xx += 2)
775 /* collect u & v for 2x2 pixel block */
779 /* do the top 2 pixels of the 2x2 block which shared u & v */
781 y = YMUL * ((*yp1++) - 16);
782 r = LUT_CLIP((y + (crv * v)) >> 16);
783 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
784 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
785 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
790 y = YMUL * ((*yp1++) - 16);
791 r = LUT_CLIP((y + (crv * v)) >> 16);
792 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
793 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
794 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
798 /* do the bottom 2 pixels */
800 y = YMUL * ((*yp2++) - 16);
801 r = LUT_CLIP((y + (crv * v)) >> 16);
802 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
803 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
804 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(r,g,b);
809 y = YMUL * ((*yp2++) - 16);
810 r = LUT_CLIP((y + (crv * v)) >> 16);
811 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
812 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
813 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(r,g,b);
817 /* jump down one line since we are doing 2 at once */
826 _evas_yv12torgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h)
831 unsigned char *yp1, *yp2, *up, *vp;
832 unsigned char *dp1, *dp2;
834 /* destination pointers */
838 for (yy = 0; yy < h; yy += 2)
843 up = yuv[h + (yy / 2)];
844 vp = yuv[h + (h / 2) + (yy / 2)];
845 for (xx = 0; xx < w; xx += 2)
849 /* collect u & v for 2x2 pixel block */
854 vmu = _v813[v] + _v391[u];
858 /* do the top 2 pixels of the 2x2 block which shared u & v */
861 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
867 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
871 /* do the bottom 2 pixels */
874 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
880 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
884 /* jump down one line since we are doing 2 at once */
892 evas_common_convert_yuv_422_601_rgba(DATA8 **src, DATA8 *dst, int w, int h)
895 if (!initted) _evas_yuv_init();
897 _evas_yuy2torgb_raster(src, dst, w, h);
902 _evas_yuy2torgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h)
907 unsigned char *yp1, *yp2, *up, *vp;
912 /* destination pointers */
913 for (yy = 0; yy < h; yy++)
924 for (xx = 0; xx < w; xx += 2)
928 /* collect u & v for 2 pixels block */
933 vmu = _v813[v] + _v391[u];
937 /* do the top 2 pixels of the 2x2 block which shared u & v */
940 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
946 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
950 yp1 += 4; yp2 += 4; up += 4; vp += 4;