rgb2rgb_template: add MMX/SSE2/AVX-optimized deinterleaveBytes
authorMichael Niedermayer <michaelni@gmx.at>
Tue, 19 Nov 2013 13:57:52 +0000 (14:57 +0100)
committerAnton Khirnov <anton@khirnov.net>
Tue, 21 Jan 2014 17:03:41 +0000 (18:03 +0100)
Signed-off-by: Anton Khirnov <anton@khirnov.net>
libswscale/x86/rgb2rgb_template.c

index c8bbb04..dc3c694 100644 (file)
@@ -1943,6 +1943,30 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
 }
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
 
+#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
+void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
+                         const uint8_t *src, const uint8_t *unused, int w,
+                         uint32_t *unused2);
+static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
+                                      int width, int height, int srcStride,
+                                      int dst1Stride, int dst2Stride)
+{
+    int h;
+
+    for (h = 0; h < height; h++) {
+        RENAME(ff_nv12ToUV)(dst1, dst2, src, NULL, width, NULL);
+        src  += srcStride;
+        dst1 += dst1Stride;
+        dst2 += dst2Stride;
+    }
+    __asm__(
+            EMMS"       \n\t"
+            SFENCE"     \n\t"
+            ::: "memory"
+            );
+}
+#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+
 #if !COMPILE_TEMPLATE_SSE2
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
@@ -2512,4 +2536,7 @@ static av_cold void RENAME(rgb2rgb_init)(void)
 #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
     interleaveBytes    = RENAME(interleaveBytes);
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
+    deinterleaveBytes  = RENAME(deinterleaveBytes);
+#endif
 }