yuy2toyv12 in MMX
authorMichael Niedermayer <michaelni@gmx.at>
Mon, 5 Nov 2001 02:25:30 +0000 (02:25 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Mon, 5 Nov 2001 02:25:30 +0000 (02:25 +0000)
Originally committed as revision 2705 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc

postproc/rgb2rgb.c
postproc/rgb2rgb_template.c

index d2875b6..166557e 100644 (file)
@@ -317,6 +317,60 @@ void yv12toyuy2(uint8_t *ysrc, uint8_t *usrc, uint8_t *vsrc, uint8_t *dst, int n
 
 void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int num_pixels)
 {
+#ifdef HAVE_MMX
+       asm volatile(
+               "xorl %%eax, %%eax              \n\t"
+               "pcmpeqw %%mm7, %%mm7           \n\t"
+               "psrlw $8, %%mm7                \n\t" // FF,00,FF,00...
+               "1:                             \n\t"
+               PREFETCH" 64(%0, %%eax, 4)      \n\t"
+               "movq (%0, %%eax, 4), %%mm0     \n\t" // YUYV YUYV(0)
+               "movq 8(%0, %%eax, 4), %%mm1    \n\t" // YUYV YUYV(4)
+               "movq %%mm0, %%mm2              \n\t" // YUYV YUYV(0)
+               "movq %%mm1, %%mm3              \n\t" // YUYV YUYV(4)
+               "psrlw $8, %%mm0                \n\t" // U0V0 U0V0(0)
+               "psrlw $8, %%mm1                \n\t" // U0V0 U0V0(4)
+               "pand %%mm7, %%mm2              \n\t" // Y0Y0 Y0Y0(0)
+               "pand %%mm7, %%mm3              \n\t" // Y0Y0 Y0Y0(4)
+               "packuswb %%mm1, %%mm0          \n\t" // UVUV UVUV(0)
+               "packuswb %%mm3, %%mm2          \n\t" // YYYY YYYY(0)
+
+               MOVNTQ" %%mm2, (%1, %%eax, 2)   \n\t"
+
+               "movq 16(%0, %%eax, 4), %%mm1   \n\t" // YUYV YUYV(8)
+               "movq 24(%0, %%eax, 4), %%mm2   \n\t" // YUYV YUYV(12)
+               "movq %%mm1, %%mm3              \n\t" // YUYV YUYV(8)
+               "movq %%mm2, %%mm4              \n\t" // YUYV YUYV(12)
+               "psrlw $8, %%mm1                \n\t" // U0V0 U0V0(8)
+               "psrlw $8, %%mm2                \n\t" // U0V0 U0V0(12)
+               "pand %%mm7, %%mm3              \n\t" // Y0Y0 Y0Y0(8)
+               "pand %%mm7, %%mm4              \n\t" // Y0Y0 Y0Y0(12)
+               "packuswb %%mm2, %%mm1          \n\t" // UVUV UVUV(8)
+               "packuswb %%mm4, %%mm3          \n\t" // YYYY YYYY(8)
+
+               MOVNTQ" %%mm3, 8(%1, %%eax, 2)  \n\t"
+
+               "movq %%mm0, %%mm2              \n\t" // UVUV UVUV(0)
+               "movq %%mm1, %%mm3              \n\t" // UVUV UVUV(8)
+               "psrlw $8, %%mm0                \n\t" // V0V0 V0V0(0)
+               "psrlw $8, %%mm1                \n\t" // V0V0 V0V0(8)
+               "pand %%mm7, %%mm2              \n\t" // U0U0 U0U0(0)
+               "pand %%mm7, %%mm3              \n\t" // U0U0 U0U0(8)
+               "packuswb %%mm1, %%mm0          \n\t" // VVVV VVVV(0)
+               "packuswb %%mm3, %%mm2          \n\t" // UUUU UUUU(0)
+
+               MOVNTQ" %%mm0, (%3, %%eax)      \n\t"
+               MOVNTQ" %%mm2, (%2, %%eax)      \n\t"
+
+               "addl $8, %%eax                 \n\t"
+               "cmpl %4, %%eax                 \n\t"
+               " jb 1b                         \n\t"
+               EMMS" \n\t"
+               SFENCE
+               ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (num_pixels>>1)
+               : "memory", "%eax"
+       );
+#else
        int i;
        num_pixels>>=1;
        for(i=0; i<num_pixels; i++)
@@ -326,4 +380,5 @@ void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int n
                 ydst[2*i+1]    = src[4*i+2];
                 vdst[i]        = src[4*i+3];
        }
+#endif
 }
\ No newline at end of file
index d2875b6..166557e 100644 (file)
@@ -317,6 +317,60 @@ void yv12toyuy2(uint8_t *ysrc, uint8_t *usrc, uint8_t *vsrc, uint8_t *dst, int n
 
 void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int num_pixels)
 {
+#ifdef HAVE_MMX
+       asm volatile(
+               "xorl %%eax, %%eax              \n\t"
+               "pcmpeqw %%mm7, %%mm7           \n\t"
+               "psrlw $8, %%mm7                \n\t" // FF,00,FF,00...
+               "1:                             \n\t"
+               PREFETCH" 64(%0, %%eax, 4)      \n\t"
+               "movq (%0, %%eax, 4), %%mm0     \n\t" // YUYV YUYV(0)
+               "movq 8(%0, %%eax, 4), %%mm1    \n\t" // YUYV YUYV(4)
+               "movq %%mm0, %%mm2              \n\t" // YUYV YUYV(0)
+               "movq %%mm1, %%mm3              \n\t" // YUYV YUYV(4)
+               "psrlw $8, %%mm0                \n\t" // U0V0 U0V0(0)
+               "psrlw $8, %%mm1                \n\t" // U0V0 U0V0(4)
+               "pand %%mm7, %%mm2              \n\t" // Y0Y0 Y0Y0(0)
+               "pand %%mm7, %%mm3              \n\t" // Y0Y0 Y0Y0(4)
+               "packuswb %%mm1, %%mm0          \n\t" // UVUV UVUV(0)
+               "packuswb %%mm3, %%mm2          \n\t" // YYYY YYYY(0)
+
+               MOVNTQ" %%mm2, (%1, %%eax, 2)   \n\t"
+
+               "movq 16(%0, %%eax, 4), %%mm1   \n\t" // YUYV YUYV(8)
+               "movq 24(%0, %%eax, 4), %%mm2   \n\t" // YUYV YUYV(12)
+               "movq %%mm1, %%mm3              \n\t" // YUYV YUYV(8)
+               "movq %%mm2, %%mm4              \n\t" // YUYV YUYV(12)
+               "psrlw $8, %%mm1                \n\t" // U0V0 U0V0(8)
+               "psrlw $8, %%mm2                \n\t" // U0V0 U0V0(12)
+               "pand %%mm7, %%mm3              \n\t" // Y0Y0 Y0Y0(8)
+               "pand %%mm7, %%mm4              \n\t" // Y0Y0 Y0Y0(12)
+               "packuswb %%mm2, %%mm1          \n\t" // UVUV UVUV(8)
+               "packuswb %%mm4, %%mm3          \n\t" // YYYY YYYY(8)
+
+               MOVNTQ" %%mm3, 8(%1, %%eax, 2)  \n\t"
+
+               "movq %%mm0, %%mm2              \n\t" // UVUV UVUV(0)
+               "movq %%mm1, %%mm3              \n\t" // UVUV UVUV(8)
+               "psrlw $8, %%mm0                \n\t" // V0V0 V0V0(0)
+               "psrlw $8, %%mm1                \n\t" // V0V0 V0V0(8)
+               "pand %%mm7, %%mm2              \n\t" // U0U0 U0U0(0)
+               "pand %%mm7, %%mm3              \n\t" // U0U0 U0U0(8)
+               "packuswb %%mm1, %%mm0          \n\t" // VVVV VVVV(0)
+               "packuswb %%mm3, %%mm2          \n\t" // UUUU UUUU(0)
+
+               MOVNTQ" %%mm0, (%3, %%eax)      \n\t"
+               MOVNTQ" %%mm2, (%2, %%eax)      \n\t"
+
+               "addl $8, %%eax                 \n\t"
+               "cmpl %4, %%eax                 \n\t"
+               " jb 1b                         \n\t"
+               EMMS" \n\t"
+               SFENCE
+               ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (num_pixels>>1)
+               : "memory", "%eax"
+       );
+#else
        int i;
        num_pixels>>=1;
        for(i=0; i<num_pixels; i++)
@@ -326,4 +380,5 @@ void yuy2toyv12(uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int n
                 ydst[2*i+1]    = src[4*i+2];
                 vdst[i]        = src[4*i+3];
        }
+#endif
 }
\ No newline at end of file