From 7a8f7c2f1bdd9acbc712f644a61b301b54a6629e Mon Sep 17 00:00:00 2001 From: Armin Novak Date: Fri, 25 Nov 2016 12:00:41 +0100 Subject: [PATCH] Added optimized pixel write functions for colors. --- libfreerdp/primitives/prim_YCoCg.c | 78 +-------------- libfreerdp/primitives/prim_YUV.c | 133 ++++++------------------- libfreerdp/primitives/prim_colors.c | 180 ++++++++++++++++++++++++++++++---- libfreerdp/primitives/prim_internal.h | 75 ++++++++++++++ 4 files changed, 266 insertions(+), 200 deletions(-) diff --git a/libfreerdp/primitives/prim_YCoCg.c b/libfreerdp/primitives/prim_YCoCg.c index 33ca3a1..f45477a 100644 --- a/libfreerdp/primitives/prim_YCoCg.c +++ b/libfreerdp/primitives/prim_YCoCg.c @@ -32,81 +32,6 @@ #endif /* !MINMAX */ /* ------------------------------------------------------------------------- */ -static INLINE BYTE* writePixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format, - BYTE R, BYTE G, BYTE B, BYTE A) -{ - dst[0] = B; - dst[1] = G; - dst[2] = R; - dst[3] = A; - return dst + formatSize; -} - -static INLINE BYTE* writePixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format, - BYTE R, BYTE G, BYTE B, BYTE A) -{ - dst[0] = R; - dst[1] = G; - dst[2] = B; - dst[3] = A; - return dst + formatSize; -} - -static INLINE BYTE* writePixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format, - BYTE R, BYTE G, BYTE B, BYTE A) -{ - dst[0] = A; - dst[1] = B; - dst[2] = G; - dst[3] = R; - return dst + formatSize; -} - -static INLINE BYTE* writePixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format, - BYTE R, BYTE G, BYTE B, BYTE A) -{ - dst[0] = A; - dst[1] = R; - dst[2] = G; - dst[3] = B; - return dst + formatSize; -} - -static INLINE BYTE* writePixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format, - BYTE R, BYTE G, BYTE B, BYTE A) -{ - UINT32 color = GetColor(format, R, G, B, A); - WriteColor(dst, format, color); - return dst + formatSize; -} - -typedef BYTE* (*fkt_writePixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE, BYTE); - -static INLINE fkt_writePixel getWriteFunction(DWORD format) -{ - switch (format) - { - case PIXEL_FORMAT_ARGB32: - case PIXEL_FORMAT_XRGB32: - return writePixelXRGB; - - case PIXEL_FORMAT_ABGR32: - case PIXEL_FORMAT_XBGR32: - return writePixelXBGR; - - case PIXEL_FORMAT_RGBA32: - case PIXEL_FORMAT_RGBX32: - return writePixelRGBX; - - case PIXEL_FORMAT_BGRA32: - case PIXEL_FORMAT_BGRX32: - return writePixelBGRX; - - default: - return writePixelGeneric; - } -} - static pstatus_t general_YCoCgToRGB_8u_AC4R( const BYTE* pSrc, INT32 srcStep, BYTE* pDst, UINT32 DstFormat, INT32 dstStep, @@ -120,7 +45,7 @@ static pstatus_t general_YCoCgToRGB_8u_AC4R( const BYTE* sptr = pSrc; INT16 Cg, Co, Y, T, R, G, B; const DWORD formatSize = GetBytesPerPixel(DstFormat); - fkt_writePixel writePixel = getWriteFunction(DstFormat); + fkt_writePixel writePixel = getPixelWriteFunction(DstFormat); int cll = shift - 1; /* -1 builds in the /2's */ UINT32 srcPad = srcStep - (width * 4); UINT32 dstPad = dstStep - (width * formatSize); @@ -129,7 +54,6 @@ static pstatus_t general_YCoCgToRGB_8u_AC4R( { for (x = 0; x < width; x++) { - UINT32 color; /* Note: shifts must be done before sign-conversion. */ Cg = (INT16)((INT8)((*sptr++) << cll)); Co = (INT16)((INT8)((*sptr++) << cll)); diff --git a/libfreerdp/primitives/prim_YUV.c b/libfreerdp/primitives/prim_YUV.c index 7320f33..e23835b 100644 --- a/libfreerdp/primitives/prim_YUV.c +++ b/libfreerdp/primitives/prim_YUV.c @@ -23,6 +23,7 @@ #include #include #include +#include "prim_internal.h" static INLINE BYTE CLIP(INT32 X) { @@ -326,101 +327,6 @@ static INLINE BYTE YUV2B(INT32 Y, INT32 U, INT32 V) return CLIP(b8); } -static INLINE BYTE* writeYUVPixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U, - BYTE V) -{ - const BYTE r = YUV2R(Y, U, V); - const BYTE g = YUV2G(Y, U, V); - const BYTE b = YUV2B(Y, U, V); - const BYTE a = 0xFF; - dst[0] = b; - dst[1] = g; - dst[2] = r; - dst[3] = a; - return dst + formatSize; -} - -static INLINE BYTE* writeYUVPixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U, - BYTE V) -{ - const BYTE r = YUV2R(Y, U, V); - const BYTE g = YUV2G(Y, U, V); - const BYTE b = YUV2B(Y, U, V); - const BYTE a = 0xFF; - dst[0] = r; - dst[1] = g; - dst[2] = b; - dst[3] = a; - return dst + formatSize; -} - -static INLINE BYTE* writeYUVPixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U, - BYTE V) -{ - const BYTE r = YUV2R(Y, U, V); - const BYTE g = YUV2G(Y, U, V); - const BYTE b = YUV2B(Y, U, V); - const BYTE a = 0xFF; - dst[0] = a; - dst[1] = b; - dst[2] = g; - dst[3] = r; - return dst + formatSize; -} - -static INLINE BYTE* writeYUVPixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U, - BYTE V) -{ - const BYTE r = YUV2R(Y, U, V); - const BYTE g = YUV2G(Y, U, V); - const BYTE b = YUV2B(Y, U, V); - const BYTE a = 0xFF; - dst[0] = a; - dst[1] = r; - dst[2] = g; - dst[3] = b; - return dst + formatSize; -} - -static INLINE BYTE* writeYUVPixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U, - BYTE V) -{ - const BYTE r = YUV2R(Y, U, V); - const BYTE g = YUV2G(Y, U, V); - const BYTE b = YUV2B(Y, U, V); - const BYTE a = 0xFF; - UINT32 color = GetColor(format, r, g, b, a); - WriteColor(dst, format, color); - return dst + formatSize; -} - -typedef BYTE* (*fkt_writeYUVPixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE); - -static INLINE fkt_writeYUVPixel getWriteFunction(DWORD format) -{ - switch (format) - { - case PIXEL_FORMAT_ARGB32: - case PIXEL_FORMAT_XRGB32: - return writeYUVPixelXRGB; - - case PIXEL_FORMAT_ABGR32: - case PIXEL_FORMAT_XBGR32: - return writeYUVPixelXBGR; - - case PIXEL_FORMAT_RGBA32: - case PIXEL_FORMAT_RGBX32: - return writeYUVPixelRGBX; - - case PIXEL_FORMAT_BGRA32: - case PIXEL_FORMAT_BGRX32: - return writeYUVPixelBGRX; - - default: - return writeYUVPixelGeneric; - } -} - static pstatus_t general_YUV444ToRGB_8u_P3AC4R( const BYTE* pSrc[3], const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, @@ -429,7 +335,7 @@ static pstatus_t general_YUV444ToRGB_8u_P3AC4R( UINT32 x, y; UINT32 nWidth, nHeight; const DWORD formatSize = GetBytesPerPixel(DstFormat); - fkt_writeYUVPixel writeYUVPixel = getWriteFunction(DstFormat); + fkt_writePixel writePixel = getPixelWriteFunction(DstFormat); nWidth = roi->width; nHeight = roi->height; @@ -445,7 +351,10 @@ static pstatus_t general_YUV444ToRGB_8u_P3AC4R( const BYTE Y = pY[x]; const INT32 U = pU[x]; const INT32 V = pV[x]; - pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V); + const BYTE r = YUV2R(Y, U, V); + const BYTE g = YUV2G(Y, U, V); + const BYTE b = YUV2B(Y, U, V); + pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF); } } @@ -475,7 +384,7 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R( UINT32 nWidth, nHeight; UINT32 lastRow, lastCol; const DWORD formatSize = GetBytesPerPixel(DstFormat); - fkt_writeYUVPixel writeYUVPixel = getWriteFunction(DstFormat); + fkt_writePixel writePixel = getPixelWriteFunction(DstFormat); pY = pSrc[0]; pU = pSrc[1]; pV = pSrc[2]; @@ -497,6 +406,10 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R( for (x = 0; x < halfWidth;) { + BYTE r; + BYTE g; + BYTE b; + if (++x == halfWidth) lastCol <<= 1; @@ -504,13 +417,19 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R( V = *pV++; /* 1st pixel */ Y = *pY++; - pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V); + r = YUV2R(Y, U, V); + g = YUV2G(Y, U, V); + b = YUV2B(Y, U, V); + pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF); /* 2nd pixel */ if (!(lastCol & 0x02)) { Y = *pY++; - pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V); + r = YUV2R(Y, U, V); + g = YUV2G(Y, U, V); + b = YUV2B(Y, U, V); + pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF); } else { @@ -530,6 +449,10 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R( for (x = 0; x < halfWidth;) { + BYTE r; + BYTE g; + BYTE b; + if (++x == halfWidth) lastCol <<= 1; @@ -537,13 +460,19 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R( V = *pV++; /* 3rd pixel */ Y = *pY++; - pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V); + r = YUV2R(Y, U, V); + g = YUV2G(Y, U, V); + b = YUV2B(Y, U, V); + pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF); /* 4th pixel */ if (!(lastCol & 0x02)) { Y = *pY++; - pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V); + r = YUV2R(Y, U, V); + g = YUV2G(Y, U, V); + b = YUV2B(Y, U, V); + pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF); } else { diff --git a/libfreerdp/primitives/prim_colors.c b/libfreerdp/primitives/prim_colors.c index e60a9e9..033a4d6 100644 --- a/libfreerdp/primitives/prim_colors.c +++ b/libfreerdp/primitives/prim_colors.c @@ -33,14 +33,6 @@ #endif /* !MINMAX */ /* ------------------------------------------------------------------------- */ -static INLINE BYTE* writePixel(BYTE* dst, UINT32 format, BYTE r, BYTE g, BYTE b) -{ - UINT32 color = GetColor(format, r, g, b, 0); - WriteColor(dst, format, color); - return dst + GetBytesPerPixel(format); -} - - static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R( const INT16* pSrc[3], UINT32 srcStep, BYTE* pDst, UINT32 DstFormat, UINT32 dstStep, @@ -55,6 +47,8 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R( const INT16* pCr = pSrc[2]; int srcPad = (srcStep - (roi->width * 2)) / 2; int dstPad = (dstStep - (roi->width * 4)) / 4; + fkt_writePixel writePixel = getPixelWriteFunction(DstFormat); + const DWORD formatSize = GetBytesPerPixel(DstFormat); for (y = 0; y < roi->height; y++) { @@ -82,7 +76,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R( else if (B > 255) B = 255; - pRGB = writePixel(pRGB, DstFormat, R, G, B); + pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF); pY++; pCb++; pCr++; @@ -111,6 +105,8 @@ static pstatus_t general_yCbCrToBGR_16s8u_P3AC4R( const INT16* pCr = pSrc[2]; UINT32 srcPad = (srcStep - (roi->width * 2)) / 2; UINT32 dstPad = (dstStep - (roi->width * 4)) / 4; + fkt_writePixel writePixel = getPixelWriteFunction(DstFormat); + const DWORD formatSize = GetBytesPerPixel(DstFormat); for (y = 0; y < roi->height; y++) { @@ -138,7 +134,7 @@ static pstatus_t general_yCbCrToBGR_16s8u_P3AC4R( else if (B > 255) B = 255; - pRGB = writePixel(pRGB, DstFormat, R, G, B); + pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF); pY++; pCb++; pCr++; @@ -303,6 +299,150 @@ static pstatus_t general_RGBToYCbCr_16s16s_P3P3( return PRIMITIVES_SUCCESS; } +static INLINE void writeScanlineGeneric(BYTE* dst, DWORD formatSize, UINT32 DstFormat, + const INT16* r, const INT16* g, const INT16* b, DWORD width) +{ + DWORD x; + fkt_writePixel writePixel = getPixelWriteFunction(DstFormat); + + for (x = 0; x < width; x++) + dst = (*writePixel)(dst, formatSize, DstFormat, *r++, *g++, *b++, 0xFF); +} + +static INLINE void writeScanlineRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat, + const INT16* r, const INT16* g, const INT16* b, DWORD width) +{ + DWORD x; + + for (x = 0; x < width; x++) + { + const BYTE R = *r++; + const BYTE G = *g++; + const BYTE B = *b++; + *dst++ = R; + *dst++ = G; + *dst++ = B; + } +} + +static INLINE void writeScanlineBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat, + const INT16* r, const INT16* g, const INT16* b, DWORD width) +{ + DWORD x; + + for (x = 0; x < width; x++) + { + const BYTE R = *r++; + const BYTE G = *g++; + const BYTE B = *b++; + *dst++ = B; + *dst++ = G; + *dst++ = R; + } +} + +static INLINE void writeScanlineBGRX(BYTE* dst, DWORD formatSize, UINT32 DstFormat, + const INT16* r, const INT16* g, const INT16* b, DWORD width) +{ + DWORD x; + + for (x = 0; x < width; x++) + { + const BYTE R = *r++; + const BYTE G = *g++; + const BYTE B = *b++; + *dst++ = B; + *dst++ = G; + *dst++ = R; + *dst++ = 0xFF; + } +} + +static INLINE void writeScanlineRGBX(BYTE* dst, DWORD formatSize, UINT32 DstFormat, + const INT16* r, const INT16* g, const INT16* b, DWORD width) +{ + DWORD x; + + for (x = 0; x < width; x++) + { + const BYTE R = *r++; + const BYTE G = *g++; + const BYTE B = *b++; + *dst++ = R; + *dst++ = G; + *dst++ = B; + *dst++ = 0xFF; + } +} + +static INLINE void writeScanlineXBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat, + const INT16* r, const INT16* g, const INT16* b, DWORD width) +{ + DWORD x; + + for (x = 0; x < width; x++) + { + const BYTE R = *r++; + const BYTE G = *g++; + const BYTE B = *b++; + *dst++ = 0xFF; + *dst++ = B; + *dst++ = G; + *dst++ = R; + } +} + +static INLINE void writeScanlineXRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat, + const INT16* r, const INT16* g, const INT16* b, DWORD width) +{ + DWORD x; + + for (x = 0; x < width; x++) + { + const BYTE R = *r++; + const BYTE G = *g++; + const BYTE B = *b++; + *dst++ = 0xFF; + *dst++ = R; + *dst++ = G; + *dst++ = B; + } +} + +typedef void (*fkt_writeScanline)(BYTE*, DWORD, UINT32, const INT16*, + const INT16*, const INT16*, DWORD); + +static INLINE fkt_writeScanline getScanlineWriteFunction(DWORD format) +{ + switch (format) + { + case PIXEL_FORMAT_ARGB32: + case PIXEL_FORMAT_XRGB32: + return writeScanlineXRGB; + + case PIXEL_FORMAT_ABGR32: + case PIXEL_FORMAT_XBGR32: + return writeScanlineXBGR; + + case PIXEL_FORMAT_RGBA32: + case PIXEL_FORMAT_RGBX32: + return writeScanlineRGBX; + + case PIXEL_FORMAT_BGRA32: + case PIXEL_FORMAT_BGRX32: + return writeScanlineBGRX; + + case PIXEL_FORMAT_BGR24: + return writeScanlineBGR; + + case PIXEL_FORMAT_RGB24: + return writeScanlineRGB; + + default: + return writeScanlineGeneric; + } +} + /* ------------------------------------------------------------------------- */ static pstatus_t general_RGBToRGB_16s8u_P3AC4R( const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */ @@ -315,20 +455,18 @@ static pstatus_t general_RGBToRGB_16s8u_P3AC4R( const INT16* r = pSrc[0]; const INT16* g = pSrc[1]; const INT16* b = pSrc[2]; - BYTE* dst = pDst; - UINT32 x, y; - UINT32 srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); - UINT32 dstbump = (dstStep - (roi->width * sizeof(UINT32))); + UINT32 y; + const DWORD srcAdd = srcStep / sizeof(INT16); + fkt_writeScanline writeScanline = getScanlineWriteFunction(DstFormat); + const DWORD formatSize = GetBytesPerPixel(DstFormat); for (y = 0; y < roi->height; ++y) { - for (x = 0; x < roi->width; ++x) - dst = writePixel(dst, DstFormat, *r++, *g++, *b++); - - dst += dstbump; - r += srcbump; - g += srcbump; - b += srcbump; + (*writeScanline)(pDst, formatSize, DstFormat, r, g, b, roi->width); + pDst += dstStep; + r += srcAdd; + g += srcAdd; + b += srcAdd; } return PRIMITIVES_SUCCESS; diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h index dbffeee..879a003 100644 --- a/libfreerdp/primitives/prim_internal.h +++ b/libfreerdp/primitives/prim_internal.h @@ -34,6 +34,81 @@ ? _mm_lddqu_si128((__m128i *) (_ptr_)) \ : _mm_load_si128((__m128i *) (_ptr_))) +static INLINE BYTE* writePixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format, + BYTE R, BYTE G, BYTE B, BYTE A) +{ + *dst++ = B; + *dst++ = G; + *dst++ = R; + *dst++ = A; + return dst; +} + +static INLINE BYTE* writePixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format, + BYTE R, BYTE G, BYTE B, BYTE A) +{ + *dst++ = R; + *dst++ = G; + *dst++ = B; + *dst++ = A; + return dst; +} + +static INLINE BYTE* writePixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format, + BYTE R, BYTE G, BYTE B, BYTE A) +{ + *dst++ = A; + *dst++ = B; + *dst++ = G; + *dst++ = R; + return dst; +} + +static INLINE BYTE* writePixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format, + BYTE R, BYTE G, BYTE B, BYTE A) +{ + *dst++ = A; + *dst++ = R; + *dst++ = G; + *dst++ = B; + return dst; +} + +static INLINE BYTE* writePixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format, + BYTE R, BYTE G, BYTE B, BYTE A) +{ + UINT32 color = GetColor(format, R, G, B, A); + WriteColor(dst, format, color); + return dst + formatSize; +} + +typedef BYTE* (*fkt_writePixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE, BYTE); + +static INLINE fkt_writePixel getPixelWriteFunction(DWORD format) +{ + switch (format) + { + case PIXEL_FORMAT_ARGB32: + case PIXEL_FORMAT_XRGB32: + return writePixelXRGB; + + case PIXEL_FORMAT_ABGR32: + case PIXEL_FORMAT_XBGR32: + return writePixelXBGR; + + case PIXEL_FORMAT_RGBA32: + case PIXEL_FORMAT_RGBX32: + return writePixelRGBX; + + case PIXEL_FORMAT_BGRA32: + case PIXEL_FORMAT_BGRX32: + return writePixelBGRX; + + default: + return writePixelGeneric; + } +} + /* Function prototypes for all the init/deinit routines. */ FREERDP_LOCAL void primitives_init_copy(primitives_t* prims); FREERDP_LOCAL void primitives_init_set(primitives_t* prims); -- 2.7.4