Unscaled converters for
authorMichael Niedermayer <michaelni@gmx.at>
Thu, 19 Mar 2009 03:45:29 +0000 (03:45 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Thu, 19 Mar 2009 03:45:29 +0000 (03:45 +0000)
YUYV->YUV420P
YUYV->YUV422P
UYVY->YUV420P
UYVY->YUV422P

Originally committed as revision 28997 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale

libswscale/rgb2rgb.c
libswscale/rgb2rgb.h
libswscale/rgb2rgb_template.c
libswscale/swscale.c

index ad69265c3790d0ac8f1ed7fb548c9b35a0aa01d1..59b8e30a639d1b24b228bf58bda2e7ba425edba6 100644 (file)
@@ -87,6 +87,19 @@ void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *sr
                      long width, long height,
                      long srcStride1, long srcStride2,
                      long srcStride3, long dstStride);
+void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+
 
 #if ARCH_X86 && CONFIG_GPL
 DECLARE_ASM_CONST(8, uint64_t, mmx_null)     = 0x0000000000000000ULL;
index df912c8533444a4699ebd9a9b3641df127f30cd8..3850ef291d9377737fe0bf6fc97f3bd58740a092 100644 (file)
@@ -142,6 +142,20 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint
                             long srcStride1, long srcStride2,
                             long srcStride3, long dstStride);
 
+
+extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride);
+
 void sws_rgb2rgb_init(int flags);
 
 #endif /* SWSCALE_RGB2RGB_H */
index fb4ac23d888bc5d2b291a7fe40a9b00119d74f52..fa6de09a09f2924dc84de43438828dbf5907175d 100644 (file)
@@ -2701,6 +2701,245 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
 #endif
 }
 
+static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
+{
+    dst +=   count;
+    src += 2*count;
+    count= - count;
+
+#if HAVE_MMX
+    if(count <= -16){
+        count += 15;
+        __asm__ volatile(
+            "pcmpeqw       %%mm7, %%mm7        \n\t"
+            "psrlw            $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq -30(%1, %0, 2), %%mm0        \n\t"
+            "movq -22(%1, %0, 2), %%mm1        \n\t"
+            "movq -14(%1, %0, 2), %%mm2        \n\t"
+            "movq  -6(%1, %0, 2), %%mm3        \n\t"
+            "pand          %%mm7, %%mm0        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm2        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm1, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm2        \n\t"
+            MOVNTQ"        %%mm0,-15(%2, %0)   \n\t"
+            MOVNTQ"        %%mm2,- 7(%2, %0)   \n\t"
+            "add             $16, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src), "r"(dst)
+        );
+        count -= 15;
+    }
+#endif
+    while(count<0){
+        dst[count]= src[2*count];
+        count++;
+    }
+}
+
+static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+{
+    dst0+=   count;
+    dst1+=   count;
+    src += 4*count;
+    count= - count;
+#if HAVE_MMX
+    if(count <= -8){
+        count += 7;
+        __asm__ volatile(
+            "pcmpeqw       %%mm7, %%mm7        \n\t"
+            "psrlw            $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq -28(%1, %0, 4), %%mm0        \n\t"
+            "movq -20(%1, %0, 4), %%mm1        \n\t"
+            "movq -12(%1, %0, 4), %%mm2        \n\t"
+            "movq  -4(%1, %0, 4), %%mm3        \n\t"
+            "pand          %%mm7, %%mm0        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm2        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm1, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm2        \n\t"
+            "movq          %%mm0, %%mm1        \n\t"
+            "movq          %%mm2, %%mm3        \n\t"
+            "psrlw            $8, %%mm0        \n\t"
+            "psrlw            $8, %%mm2        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm2, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm1        \n\t"
+            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
+            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
+            "add              $8, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src), "r"(dst0), "r"(dst1)
+        );
+        count -= 7;
+    }
+#endif
+    while(count<0){
+        dst0[count]= src[4*count+0];
+        dst1[count]= src[4*count+2];
+        count++;
+    }
+}
+
+static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
+{
+    dst0+=   count;
+    dst1+=   count;
+    src += 4*count;
+    count= - count;
+#if HAVE_MMX
+    if(count <= -8){
+        count += 7;
+        __asm__ volatile(
+            "pcmpeqw       %%mm7, %%mm7        \n\t"
+            "psrlw            $8, %%mm7        \n\t"
+            "1:                                \n\t"
+            "movq -28(%1, %0, 4), %%mm0        \n\t"
+            "movq -20(%1, %0, 4), %%mm1        \n\t"
+            "movq -12(%1, %0, 4), %%mm2        \n\t"
+            "movq  -4(%1, %0, 4), %%mm3        \n\t"
+            "psrlw            $8, %%mm0        \n\t"
+            "psrlw            $8, %%mm1        \n\t"
+            "psrlw            $8, %%mm2        \n\t"
+            "psrlw            $8, %%mm3        \n\t"
+            "packuswb      %%mm1, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm2        \n\t"
+            "movq          %%mm0, %%mm1        \n\t"
+            "movq          %%mm2, %%mm3        \n\t"
+            "psrlw            $8, %%mm0        \n\t"
+            "psrlw            $8, %%mm2        \n\t"
+            "pand          %%mm7, %%mm1        \n\t"
+            "pand          %%mm7, %%mm3        \n\t"
+            "packuswb      %%mm2, %%mm0        \n\t"
+            "packuswb      %%mm3, %%mm1        \n\t"
+            MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t"
+            MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t"
+            "add              $8, %0           \n\t"
+            " js 1b                            \n\t"
+            : "+r"(count)
+            : "r"(src), "r"(dst0), "r"(dst1)
+        );
+        count -= 7;
+    }
+#endif
+    while(count<0){
+        dst0[count]= src[4*count+0];
+        dst1[count]= src[4*count+2];
+        count++;
+    }
+}
+
+static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++){
+        RENAME(extract_even)(src, ydst, width);
+        if(!(y&1)){
+            RENAME(extract_odd2)(src, udst, vdst, chromWidth);
+            udst+= chromStride;
+            vdst+= chromStride;
+        }
+
+        src += srcStride;
+        ydst+= lumStride;
+    }
+#if HAVE_MMX
+    __asm__(
+        EMMS"       \n\t"
+        SFENCE"     \n\t"
+        ::: "memory"
+        );
+#endif
+}
+
+static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++){
+        RENAME(extract_even)(src, ydst, width);
+        RENAME(extract_odd2)(src, udst, vdst, chromWidth);
+
+        src += srcStride;
+        ydst+= lumStride;
+        udst+= chromStride;
+        vdst+= chromStride;
+    }
+#if HAVE_MMX
+    __asm__(
+        EMMS"       \n\t"
+        SFENCE"     \n\t"
+        ::: "memory"
+        );
+#endif
+}
+
+static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++){
+        RENAME(extract_even)(src+1, ydst, width);
+        if(!(y&1)){
+            RENAME(extract_even2)(src, udst, vdst, chromWidth);
+            udst+= chromStride;
+            vdst+= chromStride;
+        }
+
+        src += srcStride;
+        ydst+= lumStride;
+    }
+#if HAVE_MMX
+    __asm__(
+        EMMS"       \n\t"
+        SFENCE"     \n\t"
+        ::: "memory"
+        );
+#endif
+}
+
+static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
+                                      long width, long height,
+                                      long lumStride, long chromStride, long srcStride)
+{
+    long y;
+    const long chromWidth= -((-width)>>1);
+
+    for (y=0; y<height; y++){
+        RENAME(extract_even)(src+1, ydst, width);
+        RENAME(extract_even2)(src, udst, vdst, chromWidth);
+
+        src += srcStride;
+        ydst+= lumStride;
+        udst+= chromStride;
+        vdst+= chromStride;
+    }
+#if HAVE_MMX
+    __asm__(
+        EMMS"       \n\t"
+        SFENCE"     \n\t"
+        ::: "memory"
+        );
+#endif
+}
+
 static inline void RENAME(rgb2rgb_init)(void){
     rgb15to16       = RENAME(rgb15to16);
     rgb15tobgr24    = RENAME(rgb15tobgr24);
@@ -2725,11 +2964,15 @@ static inline void RENAME(rgb2rgb_init)(void){
     yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
     yuv422ptouyvy   = RENAME(yuv422ptouyvy);
     yuy2toyv12      = RENAME(yuy2toyv12);
-//    uyvytoyv12      = RENAME(uyvytoyv12);
 //    yvu9toyv12      = RENAME(yvu9toyv12);
     planar2x        = RENAME(planar2x);
     rgb24toyv12     = RENAME(rgb24toyv12);
     interleaveBytes = RENAME(interleaveBytes);
     vu9_to_vu12     = RENAME(vu9_to_vu12);
     yvu9_to_yuy2    = RENAME(yvu9_to_yuy2);
+
+    uyvytoyuv420    = RENAME(uyvytoyuv420);
+    uyvytoyuv422    = RENAME(uyvytoyuv422);
+    yuyvtoyuv420    = RENAME(yuyvtoyuv420);
+    yuyvtoyuv422    = RENAME(yuyvtoyuv422);
 }
index 5974ea566c363cd575df7eef2771081fb2010b53..e1101b47db03d058929012e293291b9875b7aca4 100644 (file)
@@ -1745,6 +1745,50 @@ static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[],
     return srcSliceH;
 }
 
+static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
+static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
+static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
+static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
+    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
+    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
+    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
+
+    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
+
+    return srcSliceH;
+}
+
 static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
     const enum PixelFormat srcFormat= c->srcFormat;
@@ -2399,7 +2443,16 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
                 else if (dstFormat == PIX_FMT_UYVY422)
                     c->swScale= PlanarToUyvyWrapper;
             }
+
+            if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV420P)
+                c->swScale= YUYV2YUV420Wrapper;
+            if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV420P)
+                c->swScale= UYVY2YUV420Wrapper;
         }
+        if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
+            c->swScale= YUYV2YUV422Wrapper;
+        if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
+            c->swScale= UYVY2YUV422Wrapper;
 
 #ifdef COMPILE_ALTIVEC
         if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&