Added optimized pixel write functions for colors.
authorArmin Novak <armin.novak@thincast.com>
Fri, 25 Nov 2016 11:00:41 +0000 (12:00 +0100)
committerArmin Novak <armin.novak@thincast.com>
Mon, 28 Nov 2016 08:21:12 +0000 (09:21 +0100)
libfreerdp/primitives/prim_YCoCg.c
libfreerdp/primitives/prim_YUV.c
libfreerdp/primitives/prim_colors.c
libfreerdp/primitives/prim_internal.h

index 33ca3a1..f45477a 100644 (file)
 #endif /* !MINMAX */
 
 /* ------------------------------------------------------------------------- */
-static INLINE BYTE* writePixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format,
-                                   BYTE R, BYTE G, BYTE B, BYTE A)
-{
-       dst[0] = B;
-       dst[1] = G;
-       dst[2] = R;
-       dst[3] = A;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writePixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format,
-                                   BYTE R, BYTE G, BYTE B, BYTE A)
-{
-       dst[0] = R;
-       dst[1] = G;
-       dst[2] = B;
-       dst[3] = A;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writePixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format,
-                                   BYTE R, BYTE G, BYTE B, BYTE A)
-{
-       dst[0] = A;
-       dst[1] = B;
-       dst[2] = G;
-       dst[3] = R;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writePixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format,
-                                   BYTE R, BYTE G, BYTE B, BYTE A)
-{
-       dst[0] = A;
-       dst[1] = R;
-       dst[2] = G;
-       dst[3] = B;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writePixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format,
-                                      BYTE R, BYTE G, BYTE B, BYTE A)
-{
-       UINT32 color = GetColor(format, R, G, B, A);
-       WriteColor(dst, format, color);
-       return dst + formatSize;
-}
-
-typedef BYTE* (*fkt_writePixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE, BYTE);
-
-static INLINE fkt_writePixel getWriteFunction(DWORD format)
-{
-       switch (format)
-       {
-               case PIXEL_FORMAT_ARGB32:
-               case PIXEL_FORMAT_XRGB32:
-                       return writePixelXRGB;
-
-               case PIXEL_FORMAT_ABGR32:
-               case PIXEL_FORMAT_XBGR32:
-                       return writePixelXBGR;
-
-               case PIXEL_FORMAT_RGBA32:
-               case PIXEL_FORMAT_RGBX32:
-                       return writePixelRGBX;
-
-               case PIXEL_FORMAT_BGRA32:
-               case PIXEL_FORMAT_BGRX32:
-                       return writePixelBGRX;
-
-               default:
-                       return writePixelGeneric;
-       }
-}
-
 static pstatus_t general_YCoCgToRGB_8u_AC4R(
     const BYTE* pSrc, INT32 srcStep,
     BYTE* pDst, UINT32 DstFormat, INT32 dstStep,
@@ -120,7 +45,7 @@ static pstatus_t general_YCoCgToRGB_8u_AC4R(
        const BYTE* sptr = pSrc;
        INT16 Cg, Co, Y, T, R, G, B;
        const DWORD formatSize = GetBytesPerPixel(DstFormat);
-       fkt_writePixel writePixel = getWriteFunction(DstFormat);
+       fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
        int cll = shift - 1;  /* -1 builds in the /2's */
        UINT32 srcPad = srcStep - (width * 4);
        UINT32 dstPad = dstStep - (width * formatSize);
@@ -129,7 +54,6 @@ static pstatus_t general_YCoCgToRGB_8u_AC4R(
        {
                for (x = 0; x < width; x++)
                {
-                       UINT32 color;
                        /* Note: shifts must be done before sign-conversion. */
                        Cg = (INT16)((INT8)((*sptr++) << cll));
                        Co = (INT16)((INT8)((*sptr++) << cll));
index 7320f33..e23835b 100644 (file)
@@ -23,6 +23,7 @@
 #include <freerdp/types.h>
 #include <freerdp/primitives.h>
 #include <freerdp/codec/color.h>
+#include "prim_internal.h"
 
 static INLINE BYTE CLIP(INT32 X)
 {
@@ -326,101 +327,6 @@ static INLINE BYTE YUV2B(INT32 Y, INT32 U, INT32 V)
        return CLIP(b8);
 }
 
-static INLINE BYTE* writeYUVPixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
-                                      BYTE V)
-{
-       const BYTE r = YUV2R(Y, U, V);
-       const BYTE g = YUV2G(Y, U, V);
-       const BYTE b = YUV2B(Y, U, V);
-       const BYTE a = 0xFF;
-       dst[0] = b;
-       dst[1] = g;
-       dst[2] = r;
-       dst[3] = a;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writeYUVPixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
-                                      BYTE V)
-{
-       const BYTE r = YUV2R(Y, U, V);
-       const BYTE g = YUV2G(Y, U, V);
-       const BYTE b = YUV2B(Y, U, V);
-       const BYTE a = 0xFF;
-       dst[0] = r;
-       dst[1] = g;
-       dst[2] = b;
-       dst[3] = a;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writeYUVPixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
-                                      BYTE V)
-{
-       const BYTE r = YUV2R(Y, U, V);
-       const BYTE g = YUV2G(Y, U, V);
-       const BYTE b = YUV2B(Y, U, V);
-       const BYTE a = 0xFF;
-       dst[0] = a;
-       dst[1] = b;
-       dst[2] = g;
-       dst[3] = r;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writeYUVPixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
-                                      BYTE V)
-{
-       const BYTE r = YUV2R(Y, U, V);
-       const BYTE g = YUV2G(Y, U, V);
-       const BYTE b = YUV2B(Y, U, V);
-       const BYTE a = 0xFF;
-       dst[0] = a;
-       dst[1] = r;
-       dst[2] = g;
-       dst[3] = b;
-       return dst + formatSize;
-}
-
-static INLINE BYTE* writeYUVPixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
-        BYTE V)
-{
-       const BYTE r = YUV2R(Y, U, V);
-       const BYTE g = YUV2G(Y, U, V);
-       const BYTE b = YUV2B(Y, U, V);
-       const BYTE a = 0xFF;
-       UINT32 color = GetColor(format, r, g, b, a);
-       WriteColor(dst, format, color);
-       return dst + formatSize;
-}
-
-typedef BYTE* (*fkt_writeYUVPixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE);
-
-static INLINE fkt_writeYUVPixel getWriteFunction(DWORD format)
-{
-       switch (format)
-       {
-               case PIXEL_FORMAT_ARGB32:
-               case PIXEL_FORMAT_XRGB32:
-                       return writeYUVPixelXRGB;
-
-               case PIXEL_FORMAT_ABGR32:
-               case PIXEL_FORMAT_XBGR32:
-                       return writeYUVPixelXBGR;
-
-               case PIXEL_FORMAT_RGBA32:
-               case PIXEL_FORMAT_RGBX32:
-                       return writeYUVPixelRGBX;
-
-               case PIXEL_FORMAT_BGRA32:
-               case PIXEL_FORMAT_BGRX32:
-                       return writeYUVPixelBGRX;
-
-               default:
-                       return writeYUVPixelGeneric;
-       }
-}
-
 static pstatus_t general_YUV444ToRGB_8u_P3AC4R(
     const BYTE* pSrc[3], const UINT32 srcStep[3],
     BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
@@ -429,7 +335,7 @@ static pstatus_t general_YUV444ToRGB_8u_P3AC4R(
        UINT32 x, y;
        UINT32 nWidth, nHeight;
        const DWORD formatSize = GetBytesPerPixel(DstFormat);
-       fkt_writeYUVPixel writeYUVPixel = getWriteFunction(DstFormat);
+       fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
        nWidth = roi->width;
        nHeight = roi->height;
 
@@ -445,7 +351,10 @@ static pstatus_t general_YUV444ToRGB_8u_P3AC4R(
                        const BYTE Y = pY[x];
                        const INT32 U = pU[x];
                        const INT32 V = pV[x];
-                       pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
+                       const BYTE r = YUV2R(Y, U, V);
+                       const BYTE g = YUV2G(Y, U, V);
+                       const BYTE b = YUV2B(Y, U, V);
+                       pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
                }
        }
 
@@ -475,7 +384,7 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
        UINT32 nWidth, nHeight;
        UINT32 lastRow, lastCol;
        const DWORD formatSize = GetBytesPerPixel(DstFormat);
-       fkt_writeYUVPixel writeYUVPixel = getWriteFunction(DstFormat);
+       fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
        pY = pSrc[0];
        pU = pSrc[1];
        pV = pSrc[2];
@@ -497,6 +406,10 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
 
                for (x = 0; x < halfWidth;)
                {
+                       BYTE r;
+                       BYTE g;
+                       BYTE b;
+
                        if (++x == halfWidth)
                                lastCol <<= 1;
 
@@ -504,13 +417,19 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
                        V = *pV++;
                        /* 1st pixel */
                        Y = *pY++;
-                       pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
+                       r = YUV2R(Y, U, V);
+                       g = YUV2G(Y, U, V);
+                       b = YUV2B(Y, U, V);
+                       pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
 
                        /* 2nd pixel */
                        if (!(lastCol & 0x02))
                        {
                                Y = *pY++;
-                               pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
+                               r = YUV2R(Y, U, V);
+                               g = YUV2G(Y, U, V);
+                               b = YUV2B(Y, U, V);
+                               pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
                        }
                        else
                        {
@@ -530,6 +449,10 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
 
                for (x = 0; x < halfWidth;)
                {
+                       BYTE r;
+                       BYTE g;
+                       BYTE b;
+
                        if (++x == halfWidth)
                                lastCol <<= 1;
 
@@ -537,13 +460,19 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
                        V = *pV++;
                        /* 3rd pixel */
                        Y = *pY++;
-                       pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
+                       r = YUV2R(Y, U, V);
+                       g = YUV2G(Y, U, V);
+                       b = YUV2B(Y, U, V);
+                       pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
 
                        /* 4th pixel */
                        if (!(lastCol & 0x02))
                        {
                                Y = *pY++;
-                               pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
+                               r = YUV2R(Y, U, V);
+                               g = YUV2G(Y, U, V);
+                               b = YUV2B(Y, U, V);
+                               pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
                        }
                        else
                        {
index e60a9e9..033a4d6 100644 (file)
 #endif /* !MINMAX */
 
 /* ------------------------------------------------------------------------- */
-static INLINE BYTE* writePixel(BYTE* dst, UINT32 format, BYTE r, BYTE g, BYTE b)
-{
-       UINT32 color = GetColor(format, r, g, b, 0);
-       WriteColor(dst, format, color);
-       return dst + GetBytesPerPixel(format);
-}
-
-
 static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
     const INT16* pSrc[3], UINT32 srcStep,
     BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
@@ -55,6 +47,8 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
        const INT16* pCr = pSrc[2];
        int srcPad = (srcStep - (roi->width * 2)) / 2;
        int dstPad = (dstStep - (roi->width * 4)) / 4;
+       fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
+       const DWORD formatSize = GetBytesPerPixel(DstFormat);
 
        for (y = 0; y < roi->height; y++)
        {
@@ -82,7 +76,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
                        else if (B > 255)
                                B = 255;
 
-                       pRGB = writePixel(pRGB, DstFormat, R, G, B);
+                       pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF);
                        pY++;
                        pCb++;
                        pCr++;
@@ -111,6 +105,8 @@ static pstatus_t general_yCbCrToBGR_16s8u_P3AC4R(
        const INT16* pCr = pSrc[2];
        UINT32 srcPad = (srcStep - (roi->width * 2)) / 2;
        UINT32 dstPad = (dstStep - (roi->width * 4)) / 4;
+       fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
+       const DWORD formatSize = GetBytesPerPixel(DstFormat);
 
        for (y = 0; y < roi->height; y++)
        {
@@ -138,7 +134,7 @@ static pstatus_t general_yCbCrToBGR_16s8u_P3AC4R(
                        else if (B > 255)
                                B = 255;
 
-                       pRGB = writePixel(pRGB, DstFormat, R, G, B);
+                       pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF);
                        pY++;
                        pCb++;
                        pCr++;
@@ -303,6 +299,150 @@ static pstatus_t general_RGBToYCbCr_16s16s_P3P3(
        return PRIMITIVES_SUCCESS;
 }
 
+static INLINE void writeScanlineGeneric(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
+                                        const INT16* r, const INT16* g, const INT16* b, DWORD width)
+{
+       DWORD x;
+       fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
+
+       for (x = 0; x < width; x++)
+               dst = (*writePixel)(dst, formatSize, DstFormat, *r++, *g++, *b++, 0xFF);
+}
+
+static INLINE void writeScanlineRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
+                                    const INT16* r, const INT16* g, const INT16* b, DWORD width)
+{
+       DWORD x;
+
+       for (x = 0; x < width; x++)
+       {
+               const BYTE R = *r++;
+               const BYTE G = *g++;
+               const BYTE B = *b++;
+               *dst++ = R;
+               *dst++ = G;
+               *dst++ = B;
+       }
+}
+
+static INLINE void writeScanlineBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
+                                    const INT16* r, const INT16* g, const INT16* b, DWORD width)
+{
+       DWORD x;
+
+       for (x = 0; x < width; x++)
+       {
+               const BYTE R = *r++;
+               const BYTE G = *g++;
+               const BYTE B = *b++;
+               *dst++ = B;
+               *dst++ = G;
+               *dst++ = R;
+       }
+}
+
+static INLINE void writeScanlineBGRX(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
+                                     const INT16* r, const INT16* g, const INT16* b, DWORD width)
+{
+       DWORD x;
+
+       for (x = 0; x < width; x++)
+       {
+               const BYTE R = *r++;
+               const BYTE G = *g++;
+               const BYTE B = *b++;
+               *dst++ = B;
+               *dst++ = G;
+               *dst++ = R;
+               *dst++ = 0xFF;
+       }
+}
+
+static INLINE void writeScanlineRGBX(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
+                                     const INT16* r, const INT16* g, const INT16* b, DWORD width)
+{
+       DWORD x;
+
+       for (x = 0; x < width; x++)
+       {
+               const BYTE R = *r++;
+               const BYTE G = *g++;
+               const BYTE B = *b++;
+               *dst++ = R;
+               *dst++ = G;
+               *dst++ = B;
+               *dst++ = 0xFF;
+       }
+}
+
+static INLINE void writeScanlineXBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
+                                     const INT16* r, const INT16* g, const INT16* b, DWORD width)
+{
+       DWORD x;
+
+       for (x = 0; x < width; x++)
+       {
+               const BYTE R = *r++;
+               const BYTE G = *g++;
+               const BYTE B = *b++;
+               *dst++ = 0xFF;
+               *dst++ = B;
+               *dst++ = G;
+               *dst++ = R;
+       }
+}
+
+static INLINE void writeScanlineXRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
+                                     const INT16* r, const INT16* g, const INT16* b, DWORD width)
+{
+       DWORD x;
+
+       for (x = 0; x < width; x++)
+       {
+               const BYTE R = *r++;
+               const BYTE G = *g++;
+               const BYTE B = *b++;
+               *dst++ = 0xFF;
+               *dst++ = R;
+               *dst++ = G;
+               *dst++ = B;
+       }
+}
+
+typedef void (*fkt_writeScanline)(BYTE*, DWORD, UINT32, const INT16*,
+                                  const INT16*, const INT16*, DWORD);
+
+static INLINE fkt_writeScanline getScanlineWriteFunction(DWORD format)
+{
+       switch (format)
+       {
+               case PIXEL_FORMAT_ARGB32:
+               case PIXEL_FORMAT_XRGB32:
+                       return writeScanlineXRGB;
+
+               case PIXEL_FORMAT_ABGR32:
+               case PIXEL_FORMAT_XBGR32:
+                       return writeScanlineXBGR;
+
+               case PIXEL_FORMAT_RGBA32:
+               case PIXEL_FORMAT_RGBX32:
+                       return writeScanlineRGBX;
+
+               case PIXEL_FORMAT_BGRA32:
+               case PIXEL_FORMAT_BGRX32:
+                       return writeScanlineBGRX;
+
+               case PIXEL_FORMAT_BGR24:
+                       return writeScanlineBGR;
+
+               case PIXEL_FORMAT_RGB24:
+                       return writeScanlineRGB;
+
+               default:
+                       return writeScanlineGeneric;
+       }
+}
+
 /* ------------------------------------------------------------------------- */
 static pstatus_t general_RGBToRGB_16s8u_P3AC4R(
     const INT16* const pSrc[3],        /* 16-bit R,G, and B arrays */
@@ -315,20 +455,18 @@ static pstatus_t general_RGBToRGB_16s8u_P3AC4R(
        const INT16* r  = pSrc[0];
        const INT16* g  = pSrc[1];
        const INT16* b  = pSrc[2];
-       BYTE* dst = pDst;
-       UINT32 x, y;
-       UINT32 srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
-       UINT32 dstbump = (dstStep - (roi->width * sizeof(UINT32)));
+       UINT32 y;
+       const DWORD srcAdd = srcStep / sizeof(INT16);
+       fkt_writeScanline writeScanline = getScanlineWriteFunction(DstFormat);
+       const DWORD formatSize = GetBytesPerPixel(DstFormat);
 
        for (y = 0; y < roi->height; ++y)
        {
-               for (x = 0; x < roi->width; ++x)
-                       dst = writePixel(dst, DstFormat, *r++, *g++, *b++);
-
-               dst += dstbump;
-               r += srcbump;
-               g += srcbump;
-               b += srcbump;
+               (*writeScanline)(pDst, formatSize, DstFormat, r, g, b, roi->width);
+               pDst += dstStep;
+               r += srcAdd;
+               g += srcAdd;
+               b += srcAdd;
        }
 
        return PRIMITIVES_SUCCESS;
index dbffeee..879a003 100644 (file)
         ? _mm_lddqu_si128((__m128i *) (_ptr_)) \
         : _mm_load_si128((__m128i *) (_ptr_)))
 
+static INLINE BYTE* writePixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format,
+                                   BYTE R, BYTE G, BYTE B, BYTE A)
+{
+       *dst++ = B;
+       *dst++ = G;
+       *dst++ = R;
+       *dst++ = A;
+       return dst;
+}
+
+static INLINE BYTE* writePixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format,
+                                   BYTE R, BYTE G, BYTE B, BYTE A)
+{
+       *dst++ = R;
+       *dst++ = G;
+       *dst++ = B;
+       *dst++ = A;
+       return dst;
+}
+
+static INLINE BYTE* writePixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format,
+                                   BYTE R, BYTE G, BYTE B, BYTE A)
+{
+       *dst++ = A;
+       *dst++ = B;
+       *dst++ = G;
+       *dst++ = R;
+       return dst;
+}
+
+static INLINE BYTE* writePixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format,
+                                   BYTE R, BYTE G, BYTE B, BYTE A)
+{
+       *dst++ = A;
+       *dst++ = R;
+       *dst++ = G;
+       *dst++ = B;
+       return dst;
+}
+
+static INLINE BYTE* writePixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format,
+                                      BYTE R, BYTE G, BYTE B, BYTE A)
+{
+       UINT32 color = GetColor(format, R, G, B, A);
+       WriteColor(dst, format, color);
+       return dst + formatSize;
+}
+
+typedef BYTE* (*fkt_writePixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE, BYTE);
+
+static INLINE fkt_writePixel getPixelWriteFunction(DWORD format)
+{
+       switch (format)
+       {
+           case PIXEL_FORMAT_ARGB32:
+           case PIXEL_FORMAT_XRGB32:
+                   return writePixelXRGB;
+
+           case PIXEL_FORMAT_ABGR32:
+           case PIXEL_FORMAT_XBGR32:
+                   return writePixelXBGR;
+
+           case PIXEL_FORMAT_RGBA32:
+           case PIXEL_FORMAT_RGBX32:
+                   return writePixelRGBX;
+
+           case PIXEL_FORMAT_BGRA32:
+           case PIXEL_FORMAT_BGRX32:
+                   return writePixelBGRX;
+
+           default:
+                   return writePixelGeneric;
+       }
+}
+
 /* Function prototypes for all the init/deinit routines. */
 FREERDP_LOCAL void primitives_init_copy(primitives_t* prims);
 FREERDP_LOCAL void primitives_init_set(primitives_t* prims);