fixed yv12toyuy2

author Michael Niedermayer <michaelni@gmx.at>

Mon, 5 Nov 2001 18:26:49 +0000 (18:26 +0000)

committer Michael Niedermayer <michaelni@gmx.at>

Mon, 5 Nov 2001 18:26:49 +0000 (18:26 +0000)
author Michael Niedermayer <michaelni@gmx.at>
Mon, 5 Nov 2001 18:26:49 +0000 (18:26 +0000)
committer Michael Niedermayer <michaelni@gmx.at>
Mon, 5 Nov 2001 18:26:49 +0000 (18:26 +0000)
diff --git a/postproc/rgb2rgb.c b/postproc/rgb2rgb.c

index ddb805bbc61cce31132a4a662f6a3811b2ac37f4..5d538ec7a9ac54b8e3a13a69e59c3d8688261b45 100644 (file)
--- a/postproc/rgb2rgb.c
+++ b/postproc/rgb2rgb.c
@@ -291,56 +291,71 @@ void palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, cons
  }
  /**
   *
- * num_pixels must be a multiple of 16 for the MMX version
+ * width must be a multiple of 16 for the MMX version
   */
-void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, unsigned num_pixels)
+void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+       int width, int height, int lumStride, int chromStride, int dstStride)
  {
+       int y;
+       const int chromWidth= width>>1;
+       for(y=0; y<height; y++)
+       {
  #ifdef HAVE_MMX
-       asm volatile(
-               "xorl %%eax, %%eax              \n\t"
-               "1:                             \n\t"
-               PREFETCH" 32(%1, %%eax, 2)      \n\t"
-               PREFETCH" 32(%2, %%eax)         \n\t"
-               PREFETCH" 32(%3, %%eax)         \n\t"
-               "movq (%2, %%eax), %%mm0        \n\t" // U(0)
-               "movq %%mm0, %%mm2              \n\t" // U(0)
-               "movq (%3, %%eax), %%mm1        \n\t" // V(0)
-               "punpcklbw %%mm1, %%mm0         \n\t" // UVUV UVUV(0)
-               "punpckhbw %%mm1, %%mm2         \n\t" // UVUV UVUV(8)
-
-               "movq (%1, %%eax,2), %%mm3      \n\t" // Y(0)
-               "movq 8(%1, %%eax,2), %%mm5     \n\t" // Y(8)
-               "movq %%mm3, %%mm4              \n\t" // Y(0)
-               "movq %%mm5, %%mm6              \n\t" // Y(8)
-               "punpcklbw %%mm0, %%mm3         \n\t" // YUYV YUYV(0)
-               "punpckhbw %%mm0, %%mm4         \n\t" // YUYV YUYV(4)
-               "punpcklbw %%mm2, %%mm5         \n\t" // YUYV YUYV(8)
-               "punpckhbw %%mm2, %%mm6         \n\t" // YUYV YUYV(12)
+//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
+               asm volatile(
+                       "xorl %%eax, %%eax              \n\t"
+                       "1:                             \n\t"
+                       PREFETCH" 32(%1, %%eax, 2)      \n\t"
+                       PREFETCH" 32(%2, %%eax)         \n\t"
+                       PREFETCH" 32(%3, %%eax)         \n\t"
+                       "movq (%2, %%eax), %%mm0        \n\t" // U(0)
+                       "movq %%mm0, %%mm2              \n\t" // U(0)
+                       "movq (%3, %%eax), %%mm1        \n\t" // V(0)
+                       "punpcklbw %%mm1, %%mm0         \n\t" // UVUV UVUV(0)
+                       "punpckhbw %%mm1, %%mm2         \n\t" // UVUV UVUV(8)
  
-               MOVNTQ" %%mm3, (%0, %%eax, 4)   \n\t"
-               MOVNTQ" %%mm4, 8(%0, %%eax, 4)  \n\t"
-               MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
-               MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
+                       "movq (%1, %%eax,2), %%mm3      \n\t" // Y(0)
+                       "movq 8(%1, %%eax,2), %%mm5     \n\t" // Y(8)
+                       "movq %%mm3, %%mm4              \n\t" // Y(0)
+                       "movq %%mm5, %%mm6              \n\t" // Y(8)
+                       "punpcklbw %%mm0, %%mm3         \n\t" // YUYV YUYV(0)
+                       "punpckhbw %%mm0, %%mm4         \n\t" // YUYV YUYV(4)
+                       "punpcklbw %%mm2, %%mm5         \n\t" // YUYV YUYV(8)
+                       "punpckhbw %%mm2, %%mm6         \n\t" // YUYV YUYV(12)
  
-               "addl $8, %%eax                 \n\t"
-               "cmpl %4, %%eax                 \n\t"
-               " jb 1b                         \n\t"
-               EMMS" \n\t"
-               SFENCE
-               ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (num_pixels>>1)
-               : "memory", "%eax"
-       );
+                       MOVNTQ" %%mm3, (%0, %%eax, 4)   \n\t"
+                       MOVNTQ" %%mm4, 8(%0, %%eax, 4)  \n\t"
+                       MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
+                       MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
  
+                       "addl $8, %%eax                 \n\t"
+                       "cmpl %4, %%eax                 \n\t"
+                       " jb 1b                         \n\t"
+                       ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth)
+                       : "%eax"
+               );
  #else
-       int i;
-       num_pixels>>=1;
-       for(i=0; i<num_pixels; i++)
-       {
-               dst[4*i+0] = ysrc[2*i+0];
-               dst[4*i+1] = usrc[i];
-               dst[4*i+2] = ysrc[2*i+1];
-               dst[4*i+3] = vsrc[i];
+               int i;
+               for(i=0; i<chromWidth; i++)
+               {
+                       dst[4*i+0] = ysrc[2*i+0];
+                       dst[4*i+1] = usrc[i];
+                       dst[4*i+2] = ysrc[2*i+1];
+                       dst[4*i+3] = vsrc[i];
+               }
+#endif
+               if(y&1)
+               {
+                       usrc += chromStride;
+                       vsrc += chromStride;
+               }
+               ysrc += lumStride;
+               dst += dstStride;
         }
+#ifdef HAVE_MMX
+asm(    EMMS" \n\t"
+        SFENCE" \n\t"
+        :::"memory");
  #endif
  }
  
@@ -410,4 +425,4 @@ void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                  vdst[i]        = src[4*i+3];
         }
  #endif
-}
-\ No newline at end of file
+}
diff --git a/postproc/rgb2rgb.h b/postproc/rgb2rgb.h

index bfdda8d9e08a45cbbcb683d70ffbe97b83b9ce06..db29be3efe0a32b0e8c97a5d993ed3503865928a 100644 (file)
--- a/postproc/rgb2rgb.h
+++ b/postproc/rgb2rgb.h
@@ -21,8 +21,8 @@ extern void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixel
  extern void palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette);
  extern void palette8torgb24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette);
  
-extern void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, unsigned num_pixels);
+extern void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+       int width, int height, int lumStride, int chromStride, int dstStride);
  extern void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, unsigned num_pixels);
  
-
  #endif
diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c

index ddb805bbc61cce31132a4a662f6a3811b2ac37f4..5d538ec7a9ac54b8e3a13a69e59c3d8688261b45 100644 (file)
--- a/postproc/rgb2rgb_template.c
+++ b/postproc/rgb2rgb_template.c
@@ -291,56 +291,71 @@ void palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, cons
  }
  /**
   *
- * num_pixels must be a multiple of 16 for the MMX version
+ * width must be a multiple of 16 for the MMX version
   */
-void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, unsigned num_pixels)
+void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+       int width, int height, int lumStride, int chromStride, int dstStride)
  {
+       int y;
+       const int chromWidth= width>>1;
+       for(y=0; y<height; y++)
+       {
  #ifdef HAVE_MMX
-       asm volatile(
-               "xorl %%eax, %%eax              \n\t"
-               "1:                             \n\t"
-               PREFETCH" 32(%1, %%eax, 2)      \n\t"
-               PREFETCH" 32(%2, %%eax)         \n\t"
-               PREFETCH" 32(%3, %%eax)         \n\t"
-               "movq (%2, %%eax), %%mm0        \n\t" // U(0)
-               "movq %%mm0, %%mm2              \n\t" // U(0)
-               "movq (%3, %%eax), %%mm1        \n\t" // V(0)
-               "punpcklbw %%mm1, %%mm0         \n\t" // UVUV UVUV(0)
-               "punpckhbw %%mm1, %%mm2         \n\t" // UVUV UVUV(8)
-
-               "movq (%1, %%eax,2), %%mm3      \n\t" // Y(0)
-               "movq 8(%1, %%eax,2), %%mm5     \n\t" // Y(8)
-               "movq %%mm3, %%mm4              \n\t" // Y(0)
-               "movq %%mm5, %%mm6              \n\t" // Y(8)
-               "punpcklbw %%mm0, %%mm3         \n\t" // YUYV YUYV(0)
-               "punpckhbw %%mm0, %%mm4         \n\t" // YUYV YUYV(4)
-               "punpcklbw %%mm2, %%mm5         \n\t" // YUYV YUYV(8)
-               "punpckhbw %%mm2, %%mm6         \n\t" // YUYV YUYV(12)
+//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
+               asm volatile(
+                       "xorl %%eax, %%eax              \n\t"
+                       "1:                             \n\t"
+                       PREFETCH" 32(%1, %%eax, 2)      \n\t"
+                       PREFETCH" 32(%2, %%eax)         \n\t"
+                       PREFETCH" 32(%3, %%eax)         \n\t"
+                       "movq (%2, %%eax), %%mm0        \n\t" // U(0)
+                       "movq %%mm0, %%mm2              \n\t" // U(0)
+                       "movq (%3, %%eax), %%mm1        \n\t" // V(0)
+                       "punpcklbw %%mm1, %%mm0         \n\t" // UVUV UVUV(0)
+                       "punpckhbw %%mm1, %%mm2         \n\t" // UVUV UVUV(8)
  
-               MOVNTQ" %%mm3, (%0, %%eax, 4)   \n\t"
-               MOVNTQ" %%mm4, 8(%0, %%eax, 4)  \n\t"
-               MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
-               MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
+                       "movq (%1, %%eax,2), %%mm3      \n\t" // Y(0)
+                       "movq 8(%1, %%eax,2), %%mm5     \n\t" // Y(8)
+                       "movq %%mm3, %%mm4              \n\t" // Y(0)
+                       "movq %%mm5, %%mm6              \n\t" // Y(8)
+                       "punpcklbw %%mm0, %%mm3         \n\t" // YUYV YUYV(0)
+                       "punpckhbw %%mm0, %%mm4         \n\t" // YUYV YUYV(4)
+                       "punpcklbw %%mm2, %%mm5         \n\t" // YUYV YUYV(8)
+                       "punpckhbw %%mm2, %%mm6         \n\t" // YUYV YUYV(12)
  
-               "addl $8, %%eax                 \n\t"
-               "cmpl %4, %%eax                 \n\t"
-               " jb 1b                         \n\t"
-               EMMS" \n\t"
-               SFENCE
-               ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (num_pixels>>1)
-               : "memory", "%eax"
-       );
+                       MOVNTQ" %%mm3, (%0, %%eax, 4)   \n\t"
+                       MOVNTQ" %%mm4, 8(%0, %%eax, 4)  \n\t"
+                       MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
+                       MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
  
+                       "addl $8, %%eax                 \n\t"
+                       "cmpl %4, %%eax                 \n\t"
+                       " jb 1b                         \n\t"
+                       ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth)
+                       : "%eax"
+               );
  #else
-       int i;
-       num_pixels>>=1;
-       for(i=0; i<num_pixels; i++)
-       {
-               dst[4*i+0] = ysrc[2*i+0];
-               dst[4*i+1] = usrc[i];
-               dst[4*i+2] = ysrc[2*i+1];
-               dst[4*i+3] = vsrc[i];
+               int i;
+               for(i=0; i<chromWidth; i++)
+               {
+                       dst[4*i+0] = ysrc[2*i+0];
+                       dst[4*i+1] = usrc[i];
+                       dst[4*i+2] = ysrc[2*i+1];
+                       dst[4*i+3] = vsrc[i];
+               }
+#endif
+               if(y&1)
+               {
+                       usrc += chromStride;
+                       vsrc += chromStride;
+               }
+               ysrc += lumStride;
+               dst += dstStride;
         }
+#ifdef HAVE_MMX
+asm(    EMMS" \n\t"
+        SFENCE" \n\t"
+        :::"memory");
  #endif
  }
  
@@ -410,4 +425,4 @@ void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                  vdst[i]        = src[4*i+3];
         }
  #endif
-}
-\ No newline at end of file
+}
author	Michael Niedermayer <michaelni@gmx.at>
	Mon, 5 Nov 2001 18:26:49 +0000 (18:26 +0000)
committer	Michael Niedermayer <michaelni@gmx.at>
	Mon, 5 Nov 2001 18:26:49 +0000 (18:26 +0000)
postproc/rgb2rgb.c		patch \| blob \| history
postproc/rgb2rgb.h		patch \| blob \| history
postproc/rgb2rgb_template.c		patch \| blob \| history