yv12->uyvy MMX
authorMichael Niedermayer <michaelni@gmx.at>
Fri, 10 Oct 2003 00:19:16 +0000 (00:19 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Fri, 10 Oct 2003 00:19:16 +0000 (00:19 +0000)
Originally committed as revision 11073 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc

postproc/rgb2rgb_template.c

index acb53ea..eda2ccc 100644 (file)
@@ -1576,6 +1576,44 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
        const unsigned chromWidth= width>>1;
        for(y=0; y<height; y++)
        {
+#ifdef HAVE_MMX
+//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
+               asm volatile(
+                       "xorl %%eax, %%eax              \n\t"
+                       ".balign 16                     \n\t"
+                       "1:                             \n\t"
+                       PREFETCH" 32(%1, %%eax, 2)      \n\t"
+                       PREFETCH" 32(%2, %%eax)         \n\t"
+                       PREFETCH" 32(%3, %%eax)         \n\t"
+                       "movq (%2, %%eax), %%mm0        \n\t" // U(0)
+                       "movq %%mm0, %%mm2              \n\t" // U(0)
+                       "movq (%3, %%eax), %%mm1        \n\t" // V(0)
+                       "punpcklbw %%mm1, %%mm0         \n\t" // UVUV UVUV(0)
+                       "punpckhbw %%mm1, %%mm2         \n\t" // UVUV UVUV(8)
+
+                       "movq (%1, %%eax,2), %%mm3      \n\t" // Y(0)
+                       "movq 8(%1, %%eax,2), %%mm5     \n\t" // Y(8)
+                       "movq %%mm0, %%mm4              \n\t" // Y(0)
+                       "movq %%mm2, %%mm6              \n\t" // Y(8)
+                       "punpcklbw %%mm3, %%mm0         \n\t" // YUYV YUYV(0)
+                       "punpckhbw %%mm3, %%mm4         \n\t" // YUYV YUYV(4)
+                       "punpcklbw %%mm5, %%mm2         \n\t" // YUYV YUYV(8)
+                       "punpckhbw %%mm5, %%mm6         \n\t" // YUYV YUYV(12)
+
+                       MOVNTQ" %%mm0, (%0, %%eax, 4)   \n\t"
+                       MOVNTQ" %%mm4, 8(%0, %%eax, 4)  \n\t"
+                       MOVNTQ" %%mm2, 16(%0, %%eax, 4) \n\t"
+                       MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
+
+                       "addl $8, %%eax                 \n\t"
+                       "cmpl %4, %%eax                 \n\t"
+                       " jb 1b                         \n\t"
+                       ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
+                       : "%eax"
+               );
+#else
+//FIXME adapt the alpha asm code from yv12->yuy2
+
 #if __WORDSIZE >= 64
                int i;
                uint64_t *ldst = (uint64_t *) dst;
@@ -1603,6 +1641,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
                        vc++;
                }
 #endif
+#endif
                if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
                {
                        usrc += chromStride;
@@ -1611,6 +1650,11 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
                ysrc += lumStride;
                dst += dstStride;
        }
+#ifdef HAVE_MMX
+asm(    EMMS" \n\t"
+        SFENCE" \n\t"
+        :::"memory");
+#endif
 }
 
 /**