opencl: inline the openCL program in the source code
authorDavid Fort <contact@hardening-consulting.com>
Wed, 13 Nov 2019 07:28:14 +0000 (08:28 +0100)
committerakallabeth <akallabeth@users.noreply.github.com>
Fri, 22 Nov 2019 12:21:39 +0000 (13:21 +0100)
include/freerdp/primitives.h
libfreerdp/CMakeLists.txt
libfreerdp/core/window.c
libfreerdp/primitives/prim_YUV_opencl.c
libfreerdp/primitives/prim_YUV_ssse3.c
libfreerdp/primitives/prim_internal.h
libfreerdp/primitives/primitives.c
libfreerdp/primitives/primitives.cl

index a979ace..140c90b 100644 (file)
@@ -144,7 +144,6 @@ typedef pstatus_t (*__andC_32u_t)(const UINT32* pSrc, UINT32 val, UINT32* pDst,
 typedef pstatus_t (*__orC_32u_t)(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len);
 typedef pstatus_t (*primitives_uninit_t)(void);
 
-
 typedef struct
 {
        /* Memory-to-memory copy routines */
@@ -208,11 +207,10 @@ extern "C"
        FREERDP_API void primitives_set_hints(primitive_hints hints);
        FREERDP_API primitive_hints primitives_get_hints(void);
        FREERDP_API primitives_t* primitives_get_generic(void);
-       FREERDP_API DWORD primitives_flags(primitives_t *p);
-       FREERDP_API BOOL primitives_init(primitives_t *p, primitive_hints hints);
+       FREERDP_API DWORD primitives_flags(primitives_tp);
+       FREERDP_API BOOL primitives_init(primitives_tp, primitive_hints hints);
        FREERDP_API void primitives_uninit();
 
-
 #ifdef __cplusplus
 }
 #endif
index eb221b9..71e4589 100644 (file)
@@ -293,13 +293,11 @@ if (WITH_NEON)
 endif()
 
 if (WITH_OPENCL)
-       freerdp_definition_add(-DOPENCL_SOURCE_PATH="${CMAKE_INSTALL_PREFIX}/${FREERDP_PLUGIN_PATH}")
        set(PRIMITIVES_OPENCL_SRCS primitives/prim_YUV_opencl.c)
 
         freerdp_include_directory_add(${OpenCL_INCLUDE_DIRS})
         freerdp_library_add(OpenCL::OpenCL)
 
-       install(FILES primitives/primitives.cl DESTINATION ${FREERDP_PLUGIN_PATH})
 endif()
 
 set(PRIMITIVES_OPT_SRCS
index 4eea112..3e2688a 100644 (file)
@@ -567,8 +567,8 @@ static void dump_window_state_order(wLog* log, const char* msg, const WINDOW_ORD
        if (order->fieldFlags & WINDOW_ORDER_FIELD_TITLE)
                DUMP_APPEND(buffer, bufferSize, " title");
        if (order->fieldFlags & WINDOW_ORDER_FIELD_CLIENT_AREA_OFFSET)
-               DUMP_APPEND(buffer, bufferSize, " clientOffset=(%"PRId32",%"PRId32")",
-                               state->clientOffsetX, state->clientOffsetY);
+               DUMP_APPEND(buffer, bufferSize, " clientOffset=(%" PRId32 ",%" PRId32 ")",
+                           state->clientOffsetX, state->clientOffsetY);
        if (order->fieldFlags & WINDOW_ORDER_FIELD_CLIENT_AREA_SIZE)
                DUMP_APPEND(buffer, bufferSize, " clientAreaWidth=%" PRIu32 " clientAreaHeight=%" PRIu32 "",
                            state->clientAreaWidth, state->clientAreaHeight);
@@ -585,10 +585,11 @@ static void dump_window_state_order(wLog* log, const char* msg, const WINDOW_ORD
        if (order->fieldFlags & WINDOW_ORDER_FIELD_ROOT_PARENT)
                DUMP_APPEND(buffer, bufferSize, " rootParent=0x%" PRIx32 "", state->rootParentHandle);
        if (order->fieldFlags & WINDOW_ORDER_FIELD_WND_OFFSET)
-               DUMP_APPEND(buffer, bufferSize, " windowOffset=(%"PRId32",%"PRId32")", state->windowOffsetX, state->windowOffsetY);
+               DUMP_APPEND(buffer, bufferSize, " windowOffset=(%" PRId32 ",%" PRId32 ")",
+                           state->windowOffsetX, state->windowOffsetY);
        if (order->fieldFlags & WINDOW_ORDER_FIELD_WND_CLIENT_DELTA)
-               DUMP_APPEND(buffer, bufferSize, " windowClientDelta=(%"PRId32",%"PRId32")",
-                               state->windowClientDeltaX, state->windowClientDeltaY);
+               DUMP_APPEND(buffer, bufferSize, " windowClientDelta=(%" PRId32 ",%" PRId32 ")",
+                           state->windowClientDeltaX, state->windowClientDeltaY);
        if (order->fieldFlags & WINDOW_ORDER_FIELD_WND_SIZE)
                DUMP_APPEND(buffer, bufferSize, " windowWidth=%" PRIu32 " windowHeight=%" PRIu32 "",
                            state->windowWidth, state->windowHeight);
@@ -607,8 +608,8 @@ static void dump_window_state_order(wLog* log, const char* msg, const WINDOW_ORD
        }
 
        if (order->fieldFlags & WINDOW_ORDER_FIELD_VIS_OFFSET)
-               DUMP_APPEND(buffer, bufferSize, " visibleOffset=(%"PRId32",%"PRId32")", state->visibleOffsetX, 
-                               state->visibleOffsetY);
+               DUMP_APPEND(buffer, bufferSize, " visibleOffset=(%" PRId32 ",%" PRId32 ")",
+                           state->visibleOffsetX, state->visibleOffsetY);
 
        if (order->fieldFlags & WINDOW_ORDER_FIELD_VISIBILITY)
        {
index eefe684..f35f9f9 100644 (file)
@@ -54,12 +54,13 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
 {
        cl_int ret;
        int i;
-       cl_mem objs[3] = {NULL, NULL, NULL};
+       cl_mem objs[3] = { NULL, NULL, NULL };
        cl_mem destObj;
        cl_kernel kernel;
+       cl_event events[3];
        size_t indexes[2];
-       const char *sourceNames[] = {"Y", "U", "V"};
-       primitives_opencl_context *cl = primitives_get_opencl_context();
+       const char* sourceNames[] = { "Y", "U", "V" };
+       primitives_opencl_contextcl = primitives_get_opencl_context();
 
        kernel = clCreateKernel(cl->program, kernelName, &ret);
        if (ret != CL_SUCCESS)
@@ -70,15 +71,16 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
 
        for (i = 0; i < 3; i++)
        {
-               objs[i] = clCreateBuffer(cl->context, CL_MEM_READ_ONLY, srcStep[i] * roi->height, NULL, &ret);
+               objs[i] =
+                   clCreateBuffer(cl->context, CL_MEM_READ_ONLY, srcStep[i] * roi->height, NULL, &ret);
                if (ret != CL_SUCCESS)
                {
                        WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
                        goto error_objs;
                }
 
-               ret = clEnqueueWriteBuffer(cl->commandQueue, objs[i], CL_TRUE, 0, srcStep[i] * roi->height,
-                               pSrc[i], 0, NULL, NULL);
+               ret = clEnqueueWriteBuffer(cl->commandQueue, objs[i], CL_FALSE, 0, srcStep[i] * roi->height,
+                                          pSrc[i], 0, NULL, &events[i]);
                if (ret != CL_SUCCESS)
                {
                        WLog_ERR(TAG, "unable to enqueue write command for %sobj", sourceNames[i]);
@@ -96,14 +98,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
        /* push source + stride arguments*/
        for (i = 0; i < 3; i++)
        {
-               ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), (void *)&objs[i]);
+               ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), (void*)&objs[i]);
                if (ret != CL_SUCCESS)
                {
                        WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
                        goto error_set_args;
                }
 
-               ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), (void *)&srcStep[i]);
+               ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), (void*)&srcStep[i]);
                if (ret != CL_SUCCESS)
                {
                        WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
@@ -111,14 +113,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
                }
        }
 
-       ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&destObj);
+       ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&destObj);
        if (ret != CL_SUCCESS)
        {
                WLog_ERR(TAG, "unable to set arg destObj");
                goto error_set_args;
        }
 
-       ret = clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&dstStep);
+       ret = clSetKernelArg(kernel, 7, sizeof(cl_int), (void*)&dstStep);
        if (ret != CL_SUCCESS)
        {
                WLog_ERR(TAG, "unable to set arg dstStep");
@@ -127,8 +129,7 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
 
        indexes[0] = roi->width;
        indexes[1] = roi->height;
-       ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL,
-                       0, NULL, NULL);
+       ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL, 3, events, NULL);
        if (ret != CL_SUCCESS)
        {
                WLog_ERR(TAG, "unable to enqueue call kernel");
@@ -136,7 +137,8 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
        }
 
        /* Transfer result to host */
-       ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0, NULL, NULL);
+       ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0,
+                                 NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                WLog_ERR(TAG, "unable to read back buffer");
@@ -184,18 +186,19 @@ pstatus_t primitives_uninit_opencl(void)
        return PRIMITIVES_SUCCESS;
 }
 
+static const char* openclProgram =
+#include "primitives.cl"
+    ;
+
 BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
 {
        cl_platform_id* platform_ids = NULL;
        cl_uint ndevices, nplatforms, i;
        cl_kernel kernel;
        cl_int ret;
-       char sourcePath[1000];
 
        BOOL gotGPU = FALSE;
-       FILE* f;
        size_t programLen;
-       char* programSource;
 
        ret = clGetPlatformIDs(0, NULL, &nplatforms);
        if (ret != CL_SUCCESS || nplatforms < 1)
@@ -270,45 +273,14 @@ BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
                return FALSE;
        }
 
-       snprintf(sourcePath, sizeof(sourcePath), "%s/primitives.cl", OPENCL_SOURCE_PATH);
-
-       f = fopen(sourcePath, "r");
-       if (!f)
-       {
-               WLog_ERR(TAG, "openCL: unable to open source file %s", sourcePath);
-               goto error_source_file;
-       }
-
-       fseek(f, 0, SEEK_END);
-       programLen = ftell(f);
-       fseek(f, 0, SEEK_SET);
-
-       programSource = malloc(programLen);
-       if (!programSource)
-       {
-               WLog_ERR(TAG, "openCL: unable to allocate memory(%d bytes) for source file %s", programLen,
-                        sourcePath);
-               fclose(f);
-               goto error_source_file;
-       }
-
-       if (fread(programSource, programLen, 1, f) <= 0)
-       {
-               WLog_ERR(TAG, "openCL: unable to read openCL program in %s", sourcePath);
-               free(programSource);
-               fclose(f);
-               goto error_source_file;
-       }
-       fclose(f);
-
+       programLen = strlen(openclProgram);
        cl->program =
-           clCreateProgramWithSource(cl->context, 1, (const char**)&programSource, &programLen, &ret);
+           clCreateProgramWithSource(cl->context, 1, (const char**)&openclProgram, &programLen, &ret);
        if (ret != CL_SUCCESS)
        {
-               WLog_ERR(TAG, "openCL: unable to create command queue");
+               WLog_ERR(TAG, "openCL: unable to create program");
                goto out_program_create;
        }
-       free(programSource);
 
        ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL);
        if (ret != CL_SUCCESS)
@@ -343,7 +315,6 @@ BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
 
 out_program_build:
        clReleaseProgram(cl->program);
-error_source_file:
 out_program_create:
        clReleaseCommandQueue(cl->commandQueue);
        clReleaseContext(cl->context);
@@ -363,11 +334,12 @@ BOOL primitives_init_opencl(primitives_t* prims)
 }
 
 static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 srcStep[3],
-    BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* roi)
+                                              BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
+                                              const prim_size_t* roi)
 {
-       const char *kernel_name;
+       const charkernel_name;
 
-       switch(DstFormat)
+       switch (DstFormat)
        {
                case PIXEL_FORMAT_BGRA32:
                case PIXEL_FORMAT_BGRX32:
@@ -394,7 +366,4 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32
 void primitives_init_YUV_opencl(primitives_t* prims)
 {
        prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
-
 }
-
-
index beda11d..8dc39dc 100644 (file)
@@ -1448,7 +1448,6 @@ static pstatus_t ssse3_YUV420CombineToYUV444(avc444_frame_type type, const BYTE*
        }
 }
 
-
 void primitives_init_YUV_opt(primitives_t* prims)
 {
        generic = primitives_get_generic();
index fb119cf..a13b5b1 100644 (file)
@@ -40,7 +40,6 @@
 #define HAVE_CPU_OPTIMIZED_PRIMITIVES 1
 #endif
 
-
 #if defined(WITH_SSE2)
 /* Use lddqu for unaligned; load for 16-byte aligned. */
 #define LOAD_SI128(_ptr_)                                           \
index a9fa6f8..96ffd38 100644 (file)
@@ -46,7 +46,6 @@ primitive_hints primitives_get_hints(void)
        return primitivesHints;
 }
 
-
 /* Singleton pointer used throughout the program when requested. */
 static primitives_t pPrimitivesGeneric = { 0 };
 static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
@@ -62,14 +61,12 @@ static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
 
 #endif
 
-#if defined(HAVE_OPTIMIZED_PRIMITIVES)
 static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
-#endif
 
 static primitives_t pPrimitives = { 0 };
 
 /* ------------------------------------------------------------------------- */
-static BOOL primitives_init_generic(primitives_t *prims)
+static BOOL primitives_init_generic(primitives_tprims)
 {
        primitives_init_add(prims);
        primitives_init_andor(prims);
@@ -93,7 +90,7 @@ static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PV
        return primitives_init_generic(&pPrimitivesGeneric);
 }
 
-static BOOL primitives_init_optimized(primitives_t *prims)
+static BOOL primitives_init_optimized(primitives_tprims)
 {
        primitives_init_generic(prims);
 
@@ -113,11 +110,12 @@ static BOOL primitives_init_optimized(primitives_t *prims)
        return TRUE;
 }
 
-typedef struct {
-       BYTE *channels[3];
+typedef struct
+{
+       BYTE* channels[3];
        UINT32 steps[3];
        prim_size_t roi;
-       BYTE *outputBuffer;
+       BYTEoutputBuffer;
        UINT32 outputStride;
        UINT32 testedFormat;
 } primitives_YUV_benchmark;
@@ -155,7 +153,7 @@ static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_be
 
        for (i = 0; i < 3; i++)
        {
-               BYTE *buf = ret->channels[i] = malloc(roi->width * roi->height);
+               BYTEbuf = ret->channels[i] = malloc(roi->width * roi->height);
                if (!buf)
                        goto fail;
 
@@ -170,11 +168,11 @@ fail:
        return ret;
 }
 
-static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark *bench, primitives_t *prims,
-               UINT64 runTime, UINT32 *computations)
+static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims,
+                                         UINT64 runTime, UINT32* computations)
 {
-       ULONGLONG dueDate = GetTickCount64() + runTime;
-       const BYTE *channels[3];
+       ULONGLONG dueDate;
+       const BYTEchannels[3];
        int i;
 
        *computations = 0;
@@ -182,10 +180,20 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark *bench, primit
        for (i = 0; i < 3; i++)
                channels[i] = bench->channels[i];
 
+       /* do a first dry run to initialize cache and such */
+       pstatus_t status =
+           prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
+                                        bench->outputStride, bench->testedFormat, &bench->roi);
+       if (status != PRIMITIVES_SUCCESS)
+               return FALSE;
+
+       /* let's run the benchmark */
+       dueDate = GetTickCount64() + runTime;
        while (GetTickCount64() < dueDate)
        {
-               pstatus_t status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
-                               bench->outputStride, bench->testedFormat, &bench->roi);
+               pstatus_t status =
+                   prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
+                                                bench->outputStride, bench->testedFormat, &bench->roi);
                if (status != PRIMITIVES_SUCCESS)
                        return FALSE;
                *computations = *computations + 1;
@@ -193,10 +201,10 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark *bench, primit
        return TRUE;
 }
 
-static BOOL primitives_autodetect_best(primitives_t *prims)
+static BOOL primitives_autodetect_best(primitives_tprims)
 {
        BOOL ret = FALSE;
-       UINT64 benchDuration = 150; // 100 ms
+       UINT64 benchDuration = 150; /* 150 ms */
        UINT32 genericCount = 0;
        UINT32 bestCount;
        primitives_t* genericPrims = primitives_get_generic();
@@ -258,25 +266,14 @@ static BOOL primitives_autodetect_best(primitives_t *prims)
        }
 #endif
 
-       WLog_DBG(TAG,
-                "benchmark result: generic=%" PRIu32
+       WLog_DBG(TAG, "primitives benchmark result:");
+       WLog_DBG(TAG, " * generic=%" PRIu32, genericCount);
 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
-                " optimized=%" PRIu32
+       WLog_DBG(TAG, " * optimized=%" PRIu32, optimizedCount);
 #endif
 #if defined(WITH_OPENCL)
-                " openCL=%" PRIu32
-#endif
-                ,
-                genericCount
-#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
-                ,
-                optimizedCount
+       WLog_DBG(TAG, " * openCL=%" PRIu32, openclCount);
 #endif
-#if defined(WITH_OPENCL)
-                ,
-                openclCount
-#endif
-       );
        WLog_INFO(TAG, "primitives autodetect, using %s", primName);
        ret = TRUE;
 out:
@@ -291,6 +288,9 @@ static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID*
        WINPR_UNUSED(param);
        WINPR_UNUSED(context);
 
+       if (!primitives_init_optimized(&pPrimitivesGpu))
+               return FALSE;
+
        if (!primitives_init_opencl(&pPrimitivesGpu))
                return FALSE;
 
@@ -305,10 +305,8 @@ static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID*
        WINPR_UNUSED(param);
        WINPR_UNUSED(context);
 
-       if (!primitives_init_optimized(&pPrimitivesCpu))
-               return FALSE;
 
-       return TRUE;
+       return primitives_init_optimized(&pPrimitivesCpu);
 }
 #endif
 
@@ -323,22 +321,26 @@ static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID
 
 BOOL primitives_init(primitives_t* p, primitive_hints hints)
 {
-       switch(hints)
+       switch (hints)
        {
                case PRIMITIVES_AUTODETECT:
                        return primitives_autodetect_best(p);
                case PRIMITIVES_PURE_SOFT:
                        *p = pPrimitivesGeneric;
                        return TRUE;
-#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
                case PRIMITIVES_ONLY_CPU:
+#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
                        *p = pPrimitivesCpu;
-                       return TRUE;
+#else
+                       *p = pPrimitivesGeneric;
 #endif
-#if defined(WITH_OPENCL)
+                       return TRUE;
                case PRIMITIVES_ONLY_GPU:
+#if defined(WITH_OPENCL)
                        *p = pPrimitivesGpu;
                        return TRUE;
+#else
+                       return FALSE;
 #endif
                default:
                        WLog_ERR(TAG, "unknown hint %d", hints);
@@ -367,7 +369,7 @@ static void setup(void)
 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
        InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
 #endif
-#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
+#if defined(WITH_OPENCL)
        InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL);
 #endif
        InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL);
@@ -388,18 +390,23 @@ primitives_t* primitives_get_generic(void)
 primitives_t* primitives_get_by_type(DWORD type)
 {
        InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
+
        switch (type)
        {
-#if defined(WITH_OPENCL)
                case PRIMITIVES_ONLY_GPU:
-                       InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
-                       return &pPrimitivesGpu;
+#if defined(WITH_OPENCL)
+                       if (InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL))
+                               return &pPrimitivesGpu;
 #endif
-#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
+                       return NULL;
+
                case PRIMITIVES_ONLY_CPU:
-                       InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
-                       return &pPrimitivesCpu;
+#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
+                       if (InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL))
+                               return &pPrimitivesCpu;
 #endif
+                       return NULL;
+
                case PRIMITIVES_PURE_SOFT:
                default:
                        return &pPrimitivesGeneric;
index 151bc37..c1b6e7d 100644 (file)
@@ -1,22 +1,24 @@
 /**
  * FreeRDP: A Remote Desktop Protocol Implementation
  * Optimized operations using openCL
+ * vi:ts=4 sw=4
  *
  * Copyright 2019 David Fort <contact@hardening-consulting.com>
+ * Copyright 2019 Rangee Gmbh
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License. You may obtain
+ * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
  */
 
+#define STRINGIFY(x) #x
+
+STRINGIFY(
 unsigned char clamp_uc(int v, int l, int h)
 {
     if (v > h)
@@ -79,4 +81,4 @@ __kernel void yuv420_to_bgra_1b(
        destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255);         /* R */
        destPtr[3] = 0xff; /* A */
 }
-
+)