2 * vim:ts=8:sw=3:sts=8:noexpandtab:cino=>5n-3f0^-2{2
5 #include "evas_common.h"
6 #include "evas_convert_yuv.h"
8 #if defined BUILD_MMX || defined BUILD_SSE
12 #if defined HAVE_ALTIVEC_H
22 #ifdef BUILD_CONVERT_YUV
24 static void _evas_yuv_init (void);
25 static void _evas_yv12torgb_sse (unsigned char **yuv, unsigned char *rgb, int w, int h);
26 static void _evas_yv12torgb_mmx (unsigned char **yuv, unsigned char *rgb, int w, int h);
27 static void _evas_yv12torgb_altivec(unsigned char **yuv, unsigned char *rgb, int w, int h);
28 static void _evas_yv12torgb_raster (unsigned char **yuv, unsigned char *rgb, int w, int h);
29 static void _evas_yv12torgb_diz (unsigned char **yuv, unsigned char *rgb, int w, int h);
39 /* calculation float resolution in bits */
40 /* ie RES = 6 is 10.6 fixed point */
41 /* RES = 8 is 8.8 fixed point */
42 /* RES = 4 is 12.4 fixed point */
43 /* NB: going above 6 will lead to overflow... :( */
46 #define RZ(i) (i >> (BITRES - RES))
47 #define FOUR(i) {i, i, i, i}
49 #if defined BUILD_MMX || defined BUILD_SSE
50 __attribute__ ((aligned (8))) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
51 __attribute__ ((aligned (8))) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
52 __attribute__ ((aligned (8))) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
53 __attribute__ ((aligned (8))) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
54 __attribute__ ((aligned (8))) const volatile unsigned short _const_ymul [4] = FOUR(RZ(YMUL));
55 __attribute__ ((aligned (8))) const volatile unsigned short _const_128 [4] = FOUR(128);
56 __attribute__ ((aligned (8))) const volatile unsigned short _const_32 [4] = FOUR(RZ(OFF));
57 __attribute__ ((aligned (8))) const volatile unsigned short _const_16 [4] = FOUR(16);
58 __attribute__ ((aligned (8))) const volatile unsigned short _const_ff [4] = FOUR(-1);
60 #define CONST_CRVCRV *_const_crvcrv
61 #define CONST_CBUCBU *_const_cbucbu
62 #define CONST_CGUCGU *_const_cgucgu
63 #define CONST_CGVCGV *_const_cgvcgv
64 #define CONST_YMUL *_const_ymul
65 #define CONST_128 *_const_128
66 #define CONST_32 *_const_32
67 #define CONST_16 *_const_16
68 #define CONST_FF *_const_ff
70 /* for C non aligned cleanup */
71 const int _crv = RZ(CRV); /* 1.596 */
72 const int _cbu = RZ(CBU); /* 2.018 */
73 const int _cgu = RZ(CGU); /* 0.391 */
74 const int _cgv = RZ(CGV); /* 0.813 */
80 const vector unsigned short res = AVV(RES);
81 const vector signed short crv = AVV(RZ(CRV));
82 const vector signed short cbu = AVV(RZ(CBU));
83 const vector signed short cgu = AVV(RZ(CGU));
84 const vector signed short cgv = AVV(RZ(CGV));
85 const vector signed short ymul = AVV(RZ(YMUL));
86 const vector signed short c128 = AVV(128);
87 const vector signed short c32 = AVV(RZ(OFF));
88 const vector signed short c16 = AVV(16);
89 const vector unsigned char zero = AVV(0);
90 const vector signed short maxchar = AVV(255);
91 const vector unsigned char pickrg1 = AVV(0, 0x1, 0x11, 0,
95 const vector unsigned char pickrg2 = AVV(0, 0x9, 0x19, 0,
99 const vector unsigned char pickrgb1 = AVV(0x3, 0x1, 0x2, 0x11,
102 0xf, 0xd, 0xe, 0x17);
103 const vector unsigned char pickrgb2 = AVV(0x3, 0x1, 0x2, 0x19,
106 0xf, 0xd, 0xe, 0x1f);
112 /* shortcut speedup lookup-tables */
113 static short _v1164[256];
114 static short _v1596[256];
115 static short _v813[256];
116 static short _v391[256];
117 static short _v2018[256];
119 static unsigned char _clip_lut[1024];
120 #define LUT_CLIP(i) ((_clip_lut+384)[(i)])
122 #define CMP_CLIP(i) ((i&256)? (~(i>>10)) : i);
127 evas_common_convert_yuv_420p_601_rgba(DATA8 **src, DATA8 *dst, int w, int h)
131 #if defined BUILD_MMX || defined BUILD_SSE
132 evas_common_cpu_can_do(&mmx, &sse, &sse2);
141 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
142 _evas_yv12torgb_sse(src, dst, w, h);
143 else if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
144 _evas_yv12torgb_mmx(src, dst, w, h);
146 if (evas_common_cpu_has_feature(CPU_FEATURE_ALTIVEC))
147 _evas_yv12torgb_altivec(src, dst, w, h);
152 static int initted = 0;
154 if (!initted) _evas_yuv_init();
156 /* FIXME: diz may be faster sometimes */
157 _evas_yv12torgb_raster(src, dst, w, h);
162 /* Thanks to Diz for this code. i've munged it a little and turned it into */
163 /* inline macros. I tried beating it with a different algorithm using MMX */
164 /* but failed. So here we are. This is the fastest YUV->RGB i know of for */
165 /* x86. It has an issue that it doesnt convert colours accurately so the */
166 /* image looks a little "yellowy". This is a result of only 10.6 fixed point */
167 /* resolution as opposed to 16.16 in the C code. This could be fixed by */
168 /* processing half the number of pixels per cycle and going up to 32bits */
169 /* per element during compute, but it would all but negate the speedup */
170 /* from mmx I think :( It might be possible to use SSE and SSE2 here, but */
171 /* I haven't tried yet. Let's see. */
173 /* NB: XviD has almost the same code in it's assembly YV12->RGB code. same */
174 /* algorithm, same constants, same all over actually, except it actually */
175 /* does a few extra memory accesses that this one doesn't, so in theory */
176 /* this code should be faster. In the end it's all just an mmx version of */
177 /* the reference implimentation done with fixed point math */
180 _evas_yv12torgb_sse(unsigned char **yuv, unsigned char *rgb, int w, int h)
184 register unsigned char *yp1, *up, *vp;
187 /* destination pointers */
190 for (yy = 0; yy < h; yy++)
194 up = yuv[h + (yy / 2)];
195 vp = yuv[h + (h / 2) + (yy / 2)];
196 for (xx = 0; xx < (w - 7); xx += 8)
203 punpcklbw_r2r(mm7, mm2);
204 punpcklbw_r2r(mm7, mm3);
211 movq_m2r(CONST_16, mm4);
212 psubsw_r2r(mm4, mm0);
213 psubsw_r2r(mm4, mm1);
215 movq_m2r(CONST_128, mm5);
216 psubsw_r2r(mm5, mm2);
217 psubsw_r2r(mm5, mm3);
219 movq_m2r(CONST_YMUL, mm4);
220 pmullw_r2r(mm4, mm0);
221 pmullw_r2r(mm4, mm1);
223 movq_m2r(CONST_CRVCRV, mm7);
224 pmullw_r2r(mm3, mm7);
225 movq_m2r(CONST_CBUCBU, mm6);
226 pmullw_r2r(mm2, mm6);
227 movq_m2r(CONST_CGUCGU, mm5);
228 pmullw_r2r(mm2, mm5);
229 movq_m2r(CONST_CGVCGV, mm4);
230 pmullw_r2r(mm3, mm4);
233 paddsw_r2r(mm7, mm2);
234 paddsw_r2r(mm1, mm7);
238 packuswb_r2r(mm7, mm2);
242 punpckhbw_r2r(mm7, mm2);
243 punpcklbw_r2r(mm3, mm7);
247 psubsw_r2r(mm5, mm3);
248 psubsw_r2r(mm4, mm3);
249 paddsw_m2r(CONST_32, mm3);
252 psubsw_r2r(mm5, mm7);
253 psubsw_r2r(mm4, mm7);
254 paddsw_m2r(CONST_32, mm7);
258 packuswb_r2r(mm7, mm3);
262 punpckhbw_r2r(mm7, mm3);
263 punpcklbw_r2r(mm4, mm7);
266 movq_m2r(CONST_32, mm4);
267 paddsw_r2r(mm6, mm0);
268 paddsw_r2r(mm6, mm1);
269 paddsw_r2r(mm4, mm0);
270 paddsw_r2r(mm4, mm1);
273 packuswb_r2r(mm1, mm0);
277 punpckhbw_r2r(mm7, mm0);
278 punpcklbw_r2r(mm5, mm7);
281 movq_m2r(CONST_FF, mm1);
285 punpckhbw_r2r(mm3, mm2);
286 punpcklbw_r2r(mm6, mm7);
287 punpckhbw_r2r(mm1, mm0);
288 punpcklbw_r2r(mm1, mm5);
291 punpckhwd_r2r(mm5, mm7);
292 punpcklwd_r2r(mm5, mm1);
295 punpckhwd_r2r(mm0, mm2);
296 punpcklwd_r2r(mm0, mm4);
298 movntq_r2m(mm1, *(dp1));
299 movntq_r2m(mm7, *(dp1 + 8));
300 movntq_r2m(mm4, *(dp1 + 16));
301 movntq_r2m(mm2, *(dp1 + 24));
308 /* cleanup pixles that arent a multiple of 8 pixels wide */
311 int y, u, v, r, g, b;
313 for (; xx < w; xx += 2)
319 y = RZ(YMUL) * ((*yp1++) - 16);
320 r = (y + (_crv * v)) >> RES;
323 g = (y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES;
326 b = (y + (_cbu * u) + RZ(OFF)) >> RES;
333 y = RZ(YMUL) * ((*yp1++) - 16);
334 r = (y + (_crv * v)) >> RES;
337 g = (y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES;
340 b = (y + (_cbu * u) + RZ(OFF)) >> RES;
350 _evas_yv12torgb_mmx(yuv, rgb, w, h);
355 _evas_yv12torgb_mmx(unsigned char **yuv, unsigned char *rgb, int w, int h)
359 register unsigned char *yp1, *up, *vp;
362 /* destination pointers */
365 for (yy = 0; yy < h; yy++)
369 up = yuv[h + (yy / 2)];
370 vp = yuv[h + (h / 2) + (yy / 2)];
371 for (xx = 0; xx < (w - 7); xx += 8)
378 punpcklbw_r2r(mm7, mm2);
379 punpcklbw_r2r(mm7, mm3);
386 movq_m2r(CONST_16, mm4);
387 psubsw_r2r(mm4, mm0);
388 psubsw_r2r(mm4, mm1);
390 movq_m2r(CONST_128, mm5);
391 psubsw_r2r(mm5, mm2);
392 psubsw_r2r(mm5, mm3);
394 movq_m2r(CONST_YMUL, mm4);
395 pmullw_r2r(mm4, mm0);
396 pmullw_r2r(mm4, mm1);
398 movq_m2r(CONST_CRVCRV, mm7);
399 pmullw_r2r(mm3, mm7);
400 movq_m2r(CONST_CBUCBU, mm6);
401 pmullw_r2r(mm2, mm6);
402 movq_m2r(CONST_CGUCGU, mm5);
403 pmullw_r2r(mm2, mm5);
404 movq_m2r(CONST_CGVCGV, mm4);
405 pmullw_r2r(mm3, mm4);
408 paddsw_r2r(mm7, mm2);
409 paddsw_r2r(mm1, mm7);
413 packuswb_r2r(mm7, mm2);
417 punpckhbw_r2r(mm7, mm2);
418 punpcklbw_r2r(mm3, mm7);
422 psubsw_r2r(mm5, mm3);
423 psubsw_r2r(mm4, mm3);
424 paddsw_m2r(CONST_32, mm3);
427 psubsw_r2r(mm5, mm7);
428 psubsw_r2r(mm4, mm7);
429 paddsw_m2r(CONST_32, mm7);
433 packuswb_r2r(mm7, mm3);
437 punpckhbw_r2r(mm7, mm3);
438 punpcklbw_r2r(mm4, mm7);
441 movq_m2r(CONST_32, mm4);
442 paddsw_r2r(mm6, mm0);
443 paddsw_r2r(mm6, mm1);
444 paddsw_r2r(mm4, mm0);
445 paddsw_r2r(mm4, mm1);
448 packuswb_r2r(mm1, mm0);
452 punpckhbw_r2r(mm7, mm0);
453 punpcklbw_r2r(mm5, mm7);
456 movq_m2r(CONST_FF, mm1);
460 punpckhbw_r2r(mm3, mm2);
461 punpcklbw_r2r(mm6, mm7);
462 punpckhbw_r2r(mm1, mm0);
463 punpcklbw_r2r(mm1, mm5);
466 punpckhwd_r2r(mm5, mm7);
467 punpcklwd_r2r(mm5, mm1);
470 punpckhwd_r2r(mm0, mm2);
471 punpcklwd_r2r(mm0, mm4);
473 movq_r2m(mm1, *(dp1));
474 movq_r2m(mm7, *(dp1 + 8));
475 movq_r2m(mm4, *(dp1 + 16));
476 movq_r2m(mm2, *(dp1 + 24));
483 /* cleanup pixles that arent a multiple of 8 pixels wide */
486 int y, u, v, r, g, b;
488 for (; xx < w; xx += 2)
494 y = RZ(YMUL) * ((*yp1++) - 16);
495 r = (y + (_crv * v)) >> RES;
498 g = (y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES;
501 b = (y + (_cbu * u) + RZ(OFF)) >> RES;
508 y = RZ(YMUL) * ((*yp1++) - 16);
509 r = (y + (_crv * v)) >> RES;
512 g = (y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES;
515 b = (y + (_cbu * u) + RZ(OFF)) >> RES;
525 _evas_yv12torgb_raster(yuv, rgb, w, h);
530 _evas_yv12torgb_altivec(unsigned char **yuv, unsigned char *rgb, int w, int h)
536 unsigned char *yp1, *yp2, *up, *vp;
537 unsigned char *dp1, *dp2;
538 vector signed short y, u, v;
539 vector signed short r, g, b;
540 vector signed short tmp1, tmp2, tmp3;
541 vector unsigned char yperm, uperm, vperm, rgb1, rgb2;
542 vector unsigned char alpha;
544 /* handy halved w & h */
552 /* destination pointers */
556 alpha = vec_mergeh((vector unsigned char)AVV(255), zero);
557 alpha = (vector unsigned char)vec_mergeh((vector unsigned short)alpha,
558 (vector unsigned short)zero);
560 for (yy = 0; yy < h2; yy++)
562 for (xx = 0; xx < w2; xx += 4)
566 * Load 4 y and 4 u & v pixels for the 8x2 pixel block.
568 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp1);
569 /* 3 */ tmp1 = (vector signed short)vec_lde(0, (unsigned int *)up);
570 /* 3 */ tmp2 = (vector signed short)vec_lde(0, (unsigned int *)vp);
572 /* Prepare for aligning the data in their vectors */
573 /* 3 */ yperm = vec_lvsl(0, yp1);
574 /* 3 */ uperm = vec_lvsl(0, up);
575 /* 3 */ vperm = vec_lvsl(0, vp);
578 /* Save y and load the next 4 y pixels for a total of 8 */
579 /* 2 */ y = vec_perm(tmp3, tmp3, yperm);
580 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp1);
582 /* Setup and calculate the 4 u pixels */
583 /* 2 */ tmp1 = vec_perm(tmp1, tmp1, uperm);
584 /* 2 */ tmp2 = vec_perm(tmp2, tmp2, vperm);
586 /* Avoid dependancy stalls on yperm and calculate the 4 u values */
587 /* 3 */ yperm = vec_lvsr(12, yp1);
588 /* 1 */ tmp1 = (vector signed short)vec_mergeh((vector unsigned char)tmp1,
589 (vector unsigned char)tmp1);
590 /* 1 */ u = (vector signed short)vec_mergeh(zero,
591 (vector unsigned char)tmp1);
593 /* 1 */ u = vec_sub(u, c128);
594 /* 2 */ tmp3 = vec_perm(tmp3, tmp3, yperm);
596 /* Setup and calculate the 4 v values */
597 /* 1 */ tmp2 = (vector signed short)vec_mergeh((vector unsigned char)tmp2,
598 (vector unsigned char)tmp2);
599 /* 1 */ v = (vector signed short)vec_mergeh(zero,
600 (vector unsigned char)tmp2);
601 /* 4 */ tmp2 = vec_mladd(cgu, u, (vector signed short)zero);
602 /* 1 */ v = vec_sub(v, c128);
604 /* Move the data into y and start loading the next 4 pixels */
605 /* 1 */ y = (vector signed short)vec_mergeh(zero,
606 (vector unsigned char)y);
607 /* 1 */ tmp3 = (vector signed short)vec_mergeh(zero,
608 (vector unsigned char)tmp3);
609 /* 1 */ y = vec_or(y, tmp3);
611 /* Finish calculating y */
612 /* 1 */ y = vec_sub(y, c16);
613 /* 4 */ y = vec_mladd(ymul, y, (vector signed short)zero);
615 /* Perform non-dependant multiplies first. */
616 /* 4 */ tmp1 = vec_mladd(crv, v, y);
617 /* 4 */ tmp2 = vec_mladd(cgv, v, tmp2);
618 /* 4 */ tmp3 = vec_mladd(cbu, u, y);
620 /* Calculate rgb values */
621 /* 1 */ r = vec_sra(tmp1, res);
623 /* 1 */ tmp2 = vec_sub(y, tmp2);
624 /* 1 */ tmp2 = vec_add(tmp2, c32);
625 /* 1 */ g = vec_sra(tmp2, res);
627 /* 1 */ tmp3 = vec_add(tmp3, c32);
628 /* 1 */ b = vec_sra(tmp3, res);
630 /* Bound to 0 <= x <= 255 */
631 /* 1 */ r = vec_min(r, maxchar);
632 /* 1 */ g = vec_min(g, maxchar);
633 /* 1 */ b = vec_min(b, maxchar);
634 /* 1 */ r = vec_max(r, (vector signed short)zero);
635 /* 1 */ g = vec_max(g, (vector signed short)zero);
636 /* 1 */ b = vec_max(b, (vector signed short)zero);
638 /* Combine r, g and b. */
639 /* 2 */ rgb1 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
641 /* 2 */ rgb2 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
644 /* 2 */ rgb1 = vec_perm(rgb1, (vector unsigned char)b, pickrgb1);
645 /* 2 */ rgb2 = vec_perm(rgb2, (vector unsigned char)b, pickrgb2);
647 /* 1 */ rgb1 = vec_or(alpha, rgb1);
648 /* 1 */ rgb2 = vec_or(alpha, rgb2);
650 /* 3 */ vec_stl(rgb1, 0, dp1);
652 /* 3 */ vec_stl(rgb2, 0, dp1);
655 * Begin the second row calculations
659 * Load 4 y pixels for the 8x2 pixel block.
661 /* 3 */ yperm = vec_lvsl(0, yp2);
662 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp2);
665 /* Save y and load the next 4 y pixels for a total of 8 */
666 /* 2 */ y = vec_perm(tmp3, tmp3, yperm);
667 /* 3 */ yperm = vec_lvsr(12, yp2);
668 /* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp2);
669 /* 1 */ y = (vector signed short)vec_mergeh(zero,
670 (vector unsigned char)y);
672 /* Avoid dependancy stalls on yperm */
673 /* 2 */ tmp3 = vec_perm(tmp3, tmp3, yperm);
674 /* 1 */ tmp3 = (vector signed short)vec_mergeh(zero,
675 (vector unsigned char)tmp3);
676 /* 1 */ y = vec_or(y, tmp3);
678 /* Start the calculation for g */
679 /* 4 */ tmp2 = vec_mladd(cgu, u, (vector signed short)zero);
681 /* Finish calculating y */
682 /* 1 */ y = vec_sub(y, c16);
683 /* 4 */ y = vec_mladd(ymul, y, (vector signed short)zero);
685 /* Perform non-dependant multiplies first. */
686 /* 4 */ tmp2 = vec_mladd(cgv, v, tmp2);
687 /* 4 */ tmp1 = vec_mladd(crv, v, y);
688 /* 4 */ tmp3 = vec_mladd(cbu, u, y);
690 /* Calculate rgb values */
691 /* 1 */ r = vec_sra(tmp1, res);
693 /* 1 */ tmp2 = vec_sub(y, tmp2);
694 /* 1 */ tmp2 = vec_add(tmp2, c32);
695 /* 1 */ g = vec_sra(tmp2, res);
697 /* 1 */ tmp3 = vec_add(tmp3, c32);
698 /* 1 */ b = vec_sra(tmp3, res);
700 /* Bound to 0 <= x <= 255 */
701 /* 1 */ r = vec_min(r, maxchar);
702 /* 1 */ g = vec_min(g, maxchar);
703 /* 1 */ b = vec_min(b, maxchar);
704 /* 1 */ r = vec_max(r, (vector signed short)zero);
705 /* 1 */ g = vec_max(g, (vector signed short)zero);
706 /* 1 */ b = vec_max(b, (vector signed short)zero);
708 /* Combine r, g and b. */
709 /* 2 */ rgb1 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
711 /* 2 */ rgb2 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
714 /* 2 */ rgb1 = vec_perm(rgb1, (vector unsigned char)b, pickrgb1);
715 /* 2 */ rgb2 = vec_perm(rgb2, (vector unsigned char)b, pickrgb2);
717 /* 1 */ rgb1 = vec_or(alpha, rgb1);
718 /* 1 */ rgb2 = vec_or(alpha, rgb2);
720 /* 3 */ vec_stl(rgb1, 0, dp2);
722 /* 3 */ vec_stl(rgb2, 0, dp2);
724 /* Increment the YUV data pointers to the next set of pixels. */
730 /* Move the destination pointers to the next set of pixels. */
735 /* jump down one line since we are doing 2 at once */
743 _evas_yv12torgb_diz(yuv, rgb, w, h);
753 for (i = 0; i < 256; i++)
755 _v1164[i] = (int)(((float)(i - 16 )) * 1.164);
757 _v1596[i] = (int)(((float)(i - 128)) * 1.596);
758 _v813[i] = (int)(((float)(i - 128)) * 0.813);
760 _v391[i] = (int)(((float)(i - 128)) * 0.391);
761 _v2018[i] = (int)(((float)(i - 128)) * 2.018);
764 for (i = -384; i < 640; i++)
766 _clip_lut[i+384] = i < 0 ? 0 : (i > 255) ? 255 : i;
772 _evas_yv12torgb_diz(unsigned char **yuv, unsigned char *rgb, int w, int h)
776 int y, u, v, r, g, b;
777 unsigned char *yp1, *yp2, *up, *vp;
778 unsigned char *dp1, *dp2;
779 int crv, cbu, cgu, cgv;
781 /* destination pointers */
785 crv = CRV; /* 1.596 */
786 cbu = CBU; /* 2.018 */
787 cgu = CGU; /* 0.391 */
788 cgv = CGU; /* 0.813 */
790 for (yy = 0; yy < h; yy += 2)
795 up = yuv[h + (yy / 2)];
796 vp = yuv[h + (h / 2) + (yy / 2)];
797 for (xx = 0; xx < w; xx += 2)
799 /* collect u & v for 2x2 pixel block */
803 /* do the top 2 pixels of the 2x2 block whcih shared u & v */
806 y = YMUL * ((*yp1++) - 16);
807 r = (y + (crv * v)) >> 16;
810 g = (y - (cgu * u) - (cgv * v) + OFF) >>16;
813 b = (y + (cbu * u) + OFF) >> 16;
821 y = YMUL * ((*yp1++) - 16);
822 r = (y + (crv * v)) >> 16;
825 g = (y - (cgu * u) - (cgv * v) + OFF) >>16;
828 b = (y + (cbu * u) + OFF) >> 16;
834 /* do the bottom 2 pixels */
837 y = YMUL * ((*yp2++) - 16);
838 r = (y + (crv * v)) >> 16;
841 g = (y - (cgu * u) - (cgv * v) + OFF) >>16;
844 b = (y + (cbu * u) + OFF) >> 16;
852 y = YMUL * ((*yp2++) - 16);
853 r = (y + (crv * v)) >> 16;
856 g = (y - (cgu * u) - (cgv * v) + OFF) >>16;
859 b = (y + (cbu * u) + OFF) >> 16;
865 /* jump down one line since we are doing 2 at once */
873 _evas_yv12torgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h)
877 int y, u, v, r, g, b;
878 unsigned char *yp1, *yp2, *up, *vp;
879 unsigned char *dp1, *dp2;
881 /* destination pointers */
885 for (yy = 0; yy < h; yy += 2)
890 up = yuv[h + (yy / 2)];
891 vp = yuv[h + (h / 2) + (yy / 2)];
892 for (xx = 0; xx < w; xx += 2)
896 /* collect u & v for 2x2 pixel block */
901 vmu = _v813[v] + _v391[u];
905 /* do the top 2 pixels of the 2x2 block whcih shared u & v */
938 /* do the bottom 2 pixels */
971 /* jump down one line since we are doing 2 at once */