From 74d3583521de27a6c6018a647362a1974f3e2b58 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 13 Apr 2002 00:48:21 +0000
Subject: [PATCH] rgb24->bgr24

Originally committed as revision 5583 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
---
 postproc/rgb2rgb.c          | 20 ++++++++++++++
 postproc/rgb2rgb.h          |  1 +
 postproc/rgb2rgb_template.c | 67 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/postproc/rgb2rgb.c b/postproc/rgb2rgb.c
index 7e70858..b1178b8 100644
--- a/postproc/rgb2rgb.c
+++ b/postproc/rgb2rgb.c
@@ -24,6 +24,9 @@ static const uint64_t mask32b  __attribute__((aligned(8))) = 0x000000FF000000FFU
 static const uint64_t mask32g  __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
 static const uint64_t mask32r  __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
 static const uint64_t mask32   __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
+static const uint64_t mask24b  __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL;
+static const uint64_t mask24g  __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL;
+static const uint64_t mask24r  __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL;
 static const uint64_t mask24l  __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
 static const uint64_t mask24h  __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
 static const uint64_t mask24hh  __attribute__((aligned(8))) = 0xffff000000000000ULL;
@@ -316,6 +319,23 @@ void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
 #endif
 }
 
+void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
+{
+#ifdef CAN_COMPILE_X86_ASM
+	// ordered per speed fasterst first
+	if(gCpuCaps.hasMMX2)
+		rgb24tobgr24_MMX2(src, dst, src_size);
+	else if(gCpuCaps.has3DNow)
+		rgb24tobgr24_3DNow(src, dst, src_size);
+	else if(gCpuCaps.hasMMX)
+		rgb24tobgr24_MMX(src, dst, src_size);
+	else
+		rgb24tobgr24_C(src, dst, src_size);
+#else
+		rgb24tobgr24_C(src, dst, src_size);
+#endif
+}
+
 /**
  *
  * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
diff --git a/postproc/rgb2rgb.h b/postproc/rgb2rgb.h
index e5dd3be..244ed1e 100644
--- a/postproc/rgb2rgb.h
+++ b/postproc/rgb2rgb.h
@@ -17,6 +17,7 @@ extern void rgb32to15(const uint8_t *src,uint8_t *dst,unsigned src_size);
 extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size);
 extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size);
 extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size);
+extern void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size);
 
 
 extern void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette);
diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c
index 46f36d8..87493eb 100644
--- a/postproc/rgb2rgb_template.c
+++ b/postproc/rgb2rgb_template.c
@@ -571,6 +571,73 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign
 #endif
 }
 
+static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size)
+{
+	int i;
+#ifdef HAVE_MMX
+	int mmx_size= 23 - src_size;
+	asm volatile (
+		"movq "MANGLE(mask24r)", %%mm5	\n\t"
+		"movq "MANGLE(mask24g)", %%mm6	\n\t"
+		"movq "MANGLE(mask24b)", %%mm7	\n\t"
+		".balign 16			\n\t"
+		"1:				\n\t"
+		PREFETCH" 32(%1, %%eax)		\n\t"
+		"movq   (%1, %%eax), %%mm0	\n\t" // BGR BGR BG
+		"movq   (%1, %%eax), %%mm1	\n\t" // BGR BGR BG
+		"movq  2(%1, %%eax), %%mm2	\n\t" // R BGR BGR B
+		"psllq $16, %%mm0		\n\t" // 00 BGR BGR
+		"pand %%mm5, %%mm0		\n\t"
+		"pand %%mm6, %%mm1		\n\t"
+		"pand %%mm7, %%mm2		\n\t"
+		"por %%mm0, %%mm1		\n\t"
+		"por %%mm2, %%mm1		\n\t"                
+		"movq  6(%1, %%eax), %%mm0	\n\t" // BGR BGR BG
+		MOVNTQ" %%mm1,   (%2, %%eax)	\n\t" // RGB RGB RG
+		"movq  8(%1, %%eax), %%mm1	\n\t" // R BGR BGR B
+		"movq 10(%1, %%eax), %%mm2	\n\t" // GR BGR BGR
+		"pand %%mm7, %%mm0		\n\t"
+		"pand %%mm5, %%mm1		\n\t"
+		"pand %%mm6, %%mm2		\n\t"
+		"por %%mm0, %%mm1		\n\t"
+		"por %%mm2, %%mm1		\n\t"                
+		"movq 14(%1, %%eax), %%mm0	\n\t" // R BGR BGR B
+		MOVNTQ" %%mm1,  8(%2, %%eax)	\n\t" // B RGB RGB R
+		"movq 16(%1, %%eax), %%mm1	\n\t" // GR BGR BGR
+		"movq 18(%1, %%eax), %%mm2	\n\t" // BGR BGR BG
+		"pand %%mm6, %%mm0		\n\t"
+		"pand %%mm7, %%mm1		\n\t"
+		"pand %%mm5, %%mm2		\n\t"
+		"por %%mm0, %%mm1		\n\t"
+		"por %%mm2, %%mm1		\n\t"                
+		MOVNTQ" %%mm1, 16(%2, %%eax)	\n\t"
+		"addl $24, %%eax		\n\t"
+		" js 1b				\n\t"
+		: "+a" (mmx_size)
+		: "r" (src-mmx_size), "r"(dst-mmx_size)
+	);
+
+	__asm __volatile(SFENCE:::"memory");
+	__asm __volatile(EMMS:::"memory");
+
+	if(!mmx_size) return; //finihsed, was multiple of 8
+
+	src+= src_size;
+	dst+= src_size;
+	src_size= 24-mmx_size;
+	src-= src_size;
+	dst-= src_size;
+#endif
+	for(i=0; i<src_size; i+=3)
+	{
+		register int x;
+		x          = src[i + 2];
+		dst[i + 1] = src[i + 1];
+		dst[i + 2] = src[i + 0];
+		dst[i + 0] = x;
+	}
+}
+
 /**
  *
  * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
-- 
2.7.4