return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
}
+static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 srcStep[3],
+ BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
+ const prim_size_t* roi)
+{
+ const char* kernel_name;
+
+ switch (DstFormat)
+ {
+ case PIXEL_FORMAT_BGRA32:
+ case PIXEL_FORMAT_BGRX32:
+ kernel_name = "yuv444_to_bgra_1b";
+ break;
+ case PIXEL_FORMAT_XRGB32:
+ case PIXEL_FORMAT_ARGB32:
+ kernel_name = "yuv444_to_argb_1b";
+ break;
+ default:
+ {
+ primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
+ if (!p)
+ return -1;
+ return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
+ }
+ }
+
+ return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
+}
+
BOOL primitives_init_opencl(primitives_t* prims)
{
primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
return FALSE;
prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
+ prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
prims->uninit = primitives_uninit_opencl;
return TRUE;
#define STRINGIFY(x) #x
STRINGIFY(
-unsigned char clamp_uc(int v, int l, int h)
+uchar clamp_uc(int v, short l, short h)
{
if (v > h)
v = h;
if (v < l)
v = l;
- return (unsigned char)v;
+ return (uchar)v;
}
__kernel void yuv420_to_argb_1b(
- __global unsigned char *bufY, int strideY,
- __global unsigned char *bufU, int strideU,
- __global unsigned char *bufV, int strideV,
- __global unsigned char *dest, int strideDest)
+ __global const uchar *bufY, int strideY,
+ __global const uchar *bufU, int strideU,
+ __global const uchar *bufV, int strideV,
+ __global uchar *dest, int strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
- int Y = bufY[y * strideY + x];
- int Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
- int Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
+ short Y = bufY[y * strideY + x];
+ short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
- __global unsigned char *destPtr = dest + (strideDest * y) + (x * 4);
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
}
__kernel void yuv420_to_bgra_1b(
- __global unsigned char *bufY, int strideY,
- __global unsigned char *bufU, int strideU,
- __global unsigned char *bufV, int strideV,
- __global unsigned char *dest, int strideDest)
+ __global const uchar *bufY, int strideY,
+ __global const uchar *bufU, int strideU,
+ __global const uchar *bufV, int strideV,
+ __global uchar *dest, int strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
- int Y = bufY[y * strideY + x];
- int U = bufU[(y / 2) * strideU + (x / 2)] - 128;
- int V = bufV[(y / 2) * strideV + (x / 2)] - 128;
+ short Y = bufY[y * strideY + x];
+ short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
+ short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
- __global unsigned char *destPtr = dest + (strideDest * y) + (x * 4);
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[3] = 0xff; /* A */
}
+
+__kernel void yuv444_to_bgra_1b(
+ __global const uchar *bufY, int strideY,
+ __global const uchar *bufU, int strideU,
+ __global const uchar *bufV, int strideV,
+ __global uchar *dest, int strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ destPtr[3] = 0xff; /* A */
+}
+
+__kernel void yuv444_to_argb_1b(
+ __global const uchar *bufY, int strideY,
+ __global const uchar *bufU, int strideU,
+ __global const uchar *bufV, int strideV,
+ __global uchar *dest, int strideDest)
+{
+ unsigned int x = get_global_id(0);
+ unsigned int y = get_global_id(1);
+
+ short Y = bufY[y * strideY + x];
+ short U = bufU[y * strideU + x] - 128;
+ short V = bufV[y * strideV + x] - 128;
+
+ __global uchar *destPtr = dest + (strideDest * y) + (x * 4);
+
+ /**
+ * | R | ( | 256 0 403 | | Y | )
+ * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
+ * | B | ( | 256 475 0 | | V - 128 | )
+ */
+ int y256 = 256 * Y;
+ destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
+ destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
+ destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
+ destPtr[0] = 0xff; /* A */
+}
)