[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / Initialize / b3OpenCLUtils.cpp
1 /*
2 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
3 Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
4
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
10
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14 */
15
16 //Original author: Roman Ponomarev
17 //Mostly Reimplemented by Erwin Coumans
18
19 bool gDebugForceLoadingFromSource = false;
20 bool gDebugSkipLoadingBinary = false;
21
22 #include "Bullet3Common/b3Logging.h"
23
24 #include <string.h>
25
26 #ifdef _WIN32
27 #pragma warning(disable : 4996)
28 #endif
29 #include "b3OpenCLUtils.h"
30 //#include "b3OpenCLInclude.h"
31
32 #include <stdio.h>
33 #include <stdlib.h>
34
35 #define B3_MAX_CL_DEVICES 16  //who needs 16 devices?
36
37 #ifdef _WIN32
38 #include <windows.h>
39 #endif
40
41 #include <assert.h>
42 #define b3Assert assert
43 #ifndef _WIN32
44 #include <sys/stat.h>
45
46 #endif
47
48 static const char* sCachedBinaryPath = "cache";
49
50 //Set the preferred platform vendor using the OpenCL SDK
51 static const char* spPlatformVendor =
52 #if defined(CL_PLATFORM_MINI_CL)
53         "MiniCL, SCEA";
54 #elif defined(CL_PLATFORM_AMD)
55         "Advanced Micro Devices, Inc.";
56 #elif defined(CL_PLATFORM_NVIDIA)
57         "NVIDIA Corporation";
58 #elif defined(CL_PLATFORM_INTEL)
59         "Intel(R) Corporation";
60 #elif defined(B3_USE_CLEW)
61         "clew (OpenCL Extension Wrangler library)";
62 #else
63         "Unknown Vendor";
64 #endif
65
66 #ifndef CL_PLATFORM_MINI_CL
67 #ifdef _WIN32
68 #ifndef B3_USE_CLEW
69 #include "CL/cl_gl.h"
70 #endif  //B3_USE_CLEW
71 #endif  //_WIN32
72 #endif
73
74 void MyFatalBreakAPPLE(const char* errstr,
75                                            const void* private_info,
76                                            size_t cb,
77                                            void* user_data)
78 {
79         const char* patloc = strstr(errstr, "Warning");
80         //find out if it is a warning or error, exit if error
81
82         if (patloc)
83         {
84                 b3Warning("Warning: %s\n", errstr);
85         }
86         else
87         {
88                 b3Error("Error: %s\n", errstr);
89                 b3Assert(0);
90         }
91 }
92
93 #ifdef B3_USE_CLEW
94
95 int b3OpenCLUtils_clewInit()
96 {
97         int result = -1;
98
99 #ifdef _WIN32
100         const char* cl = "OpenCL.dll";
101 #elif defined __APPLE__
102         const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
103 #else  //presumable Linux? \
104            //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so
105         const char* cl = "libOpenCL.so.1";
106         result = clewInit(cl);
107         if (result != CLEW_SUCCESS)
108         {
109                 cl = "libOpenCL.so";
110         }
111         else
112         {
113                 clewExit();
114         }
115 #endif
116         result = clewInit(cl);
117         if (result != CLEW_SUCCESS)
118         {
119                 b3Error("clewInit failed with error code %d\n", result);
120         }
121         else
122         {
123                 b3Printf("clewInit succesfull using %s\n", cl);
124         }
125         return result;
126 }
127 #endif
128
129 int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum)
130 {
131 #ifdef B3_USE_CLEW
132         b3OpenCLUtils_clewInit();
133 #endif
134
135         cl_platform_id pPlatforms[10] = {0};
136
137         cl_uint numPlatforms = 0;
138         cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms);
139         //cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
140
141         if (ciErrNum != CL_SUCCESS)
142         {
143                 if (pErrNum != NULL)
144                         *pErrNum = ciErrNum;
145         }
146         return numPlatforms;
147 }
148
149 const char* b3OpenCLUtils_getSdkVendorName()
150 {
151         return spPlatformVendor;
152 }
153
154 void b3OpenCLUtils_setCachePath(const char* path)
155 {
156         sCachedBinaryPath = path;
157 }
158
159 cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
160 {
161 #ifdef B3_USE_CLEW
162         b3OpenCLUtils_clewInit();
163 #endif
164
165         cl_platform_id platform = 0;
166         unsigned int platformIndex = (unsigned int)platformIndex0;
167         cl_uint numPlatforms;
168         cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
169
170         if (platformIndex < numPlatforms)
171         {
172                 cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
173                 ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
174                 if (ciErrNum != CL_SUCCESS)
175                 {
176                         if (pErrNum != NULL)
177                                 *pErrNum = ciErrNum;
178                         return platform;
179                 }
180
181                 platform = platforms[platformIndex];
182
183                 free(platforms);
184         }
185
186         return platform;
187 }
188
189 void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo)
190 {
191         b3Assert(platform);
192         cl_int ciErrNum;
193         ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, B3_MAX_STRING_LENGTH, platformInfo->m_platformVendor, NULL);
194         oclCHECKERROR(ciErrNum, CL_SUCCESS);
195         ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_NAME, B3_MAX_STRING_LENGTH, platformInfo->m_platformName, NULL);
196         oclCHECKERROR(ciErrNum, CL_SUCCESS);
197         ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, B3_MAX_STRING_LENGTH, platformInfo->m_platformVersion, NULL);
198         oclCHECKERROR(ciErrNum, CL_SUCCESS);
199 }
200
201 void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)
202 {
203         b3OpenCLPlatformInfo platformInfo;
204         b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
205         b3Printf("Platform info:\n");
206         b3Printf("  CL_PLATFORM_VENDOR: \t\t\t%s\n", platformInfo.m_platformVendor);
207         b3Printf("  CL_PLATFORM_NAME: \t\t\t%s\n", platformInfo.m_platformName);
208         b3Printf("  CL_PLATFORM_VERSION: \t\t\t%s\n", platformInfo.m_platformVersion);
209 }
210
211 cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
212 {
213         cl_context retContext = 0;
214         cl_int ciErrNum = 0;
215         cl_uint num_entries;
216         cl_device_id devices[B3_MAX_CL_DEVICES];
217         cl_uint num_devices;
218         cl_context_properties* cprops;
219
220         /*
221         * If we could find our platform, use it. Otherwise pass a NULL and get whatever the
222         * implementation thinks we should be using.
223         */
224         cl_context_properties cps[7] = {0, 0, 0, 0, 0, 0, 0};
225         cps[0] = CL_CONTEXT_PLATFORM;
226         cps[1] = (cl_context_properties)platform;
227 #ifdef _WIN32
228 #ifndef B3_USE_CLEW
229         if (pGLContext && pGLDC)
230         {
231                 cps[2] = CL_GL_CONTEXT_KHR;
232                 cps[3] = (cl_context_properties)pGLContext;
233                 cps[4] = CL_WGL_HDC_KHR;
234                 cps[5] = (cl_context_properties)pGLDC;
235         }
236 #endif  //B3_USE_CLEW
237 #endif  //_WIN32
238         num_entries = B3_MAX_CL_DEVICES;
239
240         num_devices = -1;
241
242         ciErrNum = clGetDeviceIDs(
243                 platform,
244                 deviceType,
245                 num_entries,
246                 devices,
247                 &num_devices);
248
249         if (ciErrNum < 0)
250         {
251                 b3Printf("clGetDeviceIDs returned %d\n", ciErrNum);
252                 return 0;
253         }
254         cprops = (NULL == platform) ? NULL : cps;
255
256         if (!num_devices)
257                 return 0;
258
259         if (pGLContext)
260         {
261                 //search for the GPU that relates to the OpenCL context
262                 unsigned int i;
263                 for (i = 0; i < num_devices; i++)
264                 {
265                         retContext = clCreateContext(cprops, 1, &devices[i], NULL, NULL, &ciErrNum);
266                         if (ciErrNum == CL_SUCCESS)
267                                 break;
268                 }
269         }
270         else
271         {
272                 if (preferredDeviceIndex >= 0 && (unsigned int)preferredDeviceIndex < num_devices)
273                 {
274                         //create a context of the preferred device index
275                         retContext = clCreateContext(cprops, 1, &devices[preferredDeviceIndex], NULL, NULL, &ciErrNum);
276                 }
277                 else
278                 {
279                         //create a context of all devices
280 #if defined(__APPLE__)
281                         retContext = clCreateContext(cprops, num_devices, devices, MyFatalBreakAPPLE, NULL, &ciErrNum);
282 #else
283                         b3Printf("numDevices=%d\n", num_devices);
284
285                         retContext = clCreateContext(cprops, num_devices, devices, NULL, NULL, &ciErrNum);
286 #endif
287                 }
288         }
289         if (pErrNum != NULL)
290         {
291                 *pErrNum = ciErrNum;
292         };
293
294         return retContext;
295 }
296
297 cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId)
298 {
299 #ifdef B3_USE_CLEW
300         b3OpenCLUtils_clewInit();
301 #endif
302
303         cl_uint numPlatforms;
304         cl_context retContext = 0;
305         unsigned int i;
306
307         cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
308         if (ciErrNum != CL_SUCCESS)
309         {
310                 if (pErrNum != NULL) *pErrNum = ciErrNum;
311                 return NULL;
312         }
313         if (numPlatforms > 0)
314         {
315                 cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
316                 ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
317                 if (ciErrNum != CL_SUCCESS)
318                 {
319                         if (pErrNum != NULL)
320                                 *pErrNum = ciErrNum;
321                         free(platforms);
322                         return NULL;
323                 }
324
325                 for (i = 0; i < numPlatforms; ++i)
326                 {
327                         char pbuf[128];
328                         ciErrNum = clGetPlatformInfo(platforms[i],
329                                                                                  CL_PLATFORM_VENDOR,
330                                                                                  sizeof(pbuf),
331                                                                                  pbuf,
332                                                                                  NULL);
333                         if (ciErrNum != CL_SUCCESS)
334                         {
335                                 if (pErrNum != NULL) *pErrNum = ciErrNum;
336                                 return NULL;
337                         }
338
339                         if (preferredPlatformIndex >= 0 && i == preferredPlatformIndex)
340                         {
341                                 cl_platform_id tmpPlatform = platforms[0];
342                                 platforms[0] = platforms[i];
343                                 platforms[i] = tmpPlatform;
344                                 break;
345                         }
346                         else
347                         {
348                                 if (!strcmp(pbuf, spPlatformVendor))
349                                 {
350                                         cl_platform_id tmpPlatform = platforms[0];
351                                         platforms[0] = platforms[i];
352                                         platforms[i] = tmpPlatform;
353                                 }
354                         }
355                 }
356
357                 for (i = 0; i < numPlatforms; ++i)
358                 {
359                         cl_platform_id platform = platforms[i];
360                         assert(platform);
361
362                         retContext = b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLContext, pGLDC, preferredDeviceIndex, preferredPlatformIndex);
363
364                         if (retContext)
365                         {
366                                 //                              printf("OpenCL platform details:\n");
367                                 b3OpenCLPlatformInfo platformInfo;
368
369                                 b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
370
371                                 if (retPlatformId)
372                                         *retPlatformId = platform;
373
374                                 break;
375                         }
376                 }
377
378                 free(platforms);
379         }
380         return retContext;
381 }
382
383 //////////////////////////////////////////////////////////////////////////////
384 //! Gets the id of the nth device from the context
385 //!
386 //! @return the id or -1 when out of range
387 //! @param cxMainContext         OpenCL context
388 //! @param device_idx            index of the device of interest
389 //////////////////////////////////////////////////////////////////////////////
390 cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
391 {
392         assert(cxMainContext);
393
394         size_t szParmDataBytes;
395         cl_device_id* cdDevices;
396         cl_device_id device;
397
398         // get the list of devices associated with context
399         clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
400
401         if (szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex)
402         {
403                 return (cl_device_id)-1;
404         }
405
406         cdDevices = (cl_device_id*)malloc(szParmDataBytes);
407
408         clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
409
410         device = cdDevices[deviceIndex];
411         free(cdDevices);
412
413         return device;
414 }
415
416 int b3OpenCLUtils_getNumDevices(cl_context cxMainContext)
417 {
418         size_t szParamDataBytes;
419         int device_count;
420         clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes);
421         device_count = (int)szParamDataBytes / sizeof(cl_device_id);
422         return device_count;
423 }
424
425 void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info)
426 {
427         // CL_DEVICE_NAME
428         clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
429
430         // CL_DEVICE_VENDOR
431         clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL);
432
433         // CL_DRIVER_VERSION
434         clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL);
435
436         // CL_DEVICE_INFO
437         clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL);
438
439         // CL_DEVICE_MAX_COMPUTE_UNITS
440         clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL);
441
442         // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
443         clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL);
444
445         // CL_DEVICE_MAX_WORK_ITEM_SIZES
446         clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL);
447
448         // CL_DEVICE_MAX_WORK_GROUP_SIZE
449         clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL);
450
451         // CL_DEVICE_MAX_CLOCK_FREQUENCY
452         clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL);
453
454         // CL_DEVICE_ADDRESS_BITS
455         clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL);
456
457         // CL_DEVICE_MAX_MEM_ALLOC_SIZE
458         clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL);
459
460         // CL_DEVICE_GLOBAL_MEM_SIZE
461         clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL);
462
463         // CL_DEVICE_ERROR_CORRECTION_SUPPORT
464         clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL);
465
466         // CL_DEVICE_LOCAL_MEM_TYPE
467         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL);
468
469         // CL_DEVICE_LOCAL_MEM_SIZE
470         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL);
471
472         // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
473         clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL);
474
475         // CL_DEVICE_QUEUE_PROPERTIES
476         clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL);
477
478         // CL_DEVICE_IMAGE_SUPPORT
479         clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL);
480
481         // CL_DEVICE_MAX_READ_IMAGE_ARGS
482         clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL);
483
484         // CL_DEVICE_MAX_WRITE_IMAGE_ARGS
485         clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL);
486
487         // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
488         clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL);
489         clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL);
490         clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL);
491         clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL);
492         clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL);
493
494         // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
495         clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL);
496
497         // CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
498         clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL);
499         clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL);
500         clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL);
501         clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL);
502         clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL);
503         clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL);
504 }
505
506 void b3OpenCLUtils_printDeviceInfo(cl_device_id device)
507 {
508         b3OpenCLDeviceInfo info;
509         b3OpenCLUtils::getDeviceInfo(device, &info);
510         b3Printf("Device Info:\n");
511         b3Printf("  CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
512         b3Printf("  CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
513         b3Printf("  CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion);
514
515         if (info.m_deviceType & CL_DEVICE_TYPE_CPU)
516                 b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
517         if (info.m_deviceType & CL_DEVICE_TYPE_GPU)
518                 b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
519         if (info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR)
520                 b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
521         if (info.m_deviceType & CL_DEVICE_TYPE_DEFAULT)
522                 b3Printf("  CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
523
524         b3Printf("  CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits);
525         b3Printf("  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims);
526         b3Printf("  CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]);
527         b3Printf("  CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize);
528         b3Printf("  CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency);
529         b3Printf("  CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits);
530         b3Printf("  CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize / (1024 * 1024)));
531         b3Printf("  CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize / (1024 * 1024)));
532         b3Printf("  CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport == CL_TRUE ? "yes" : "no");
533         b3Printf("  CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global");
534         b3Printf("  CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024));
535         b3Printf("  CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024));
536         if (info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
537                 b3Printf("  CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
538         if (info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE)
539                 b3Printf("  CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
540
541         b3Printf("  CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport);
542
543         b3Printf("  CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs);
544         b3Printf("  CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs);
545         b3Printf("\n  CL_DEVICE_IMAGE <dim>");
546         b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth);
547         b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight);
548         b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth);
549         b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight);
550         b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth);
551         if (*info.m_deviceExtensions != 0)
552         {
553                 b3Printf("\n  CL_DEVICE_EXTENSIONS:%s\n", info.m_deviceExtensions);
554         }
555         else
556         {
557                 b3Printf("  CL_DEVICE_EXTENSIONS: None\n");
558         }
559         b3Printf("  CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
560         b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
561                          info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong, info.m_vecWidthFloat, info.m_vecWidthDouble);
562 }
563
564 static const char* strip2(const char* name, const char* pattern)
565 {
566         size_t const patlen = strlen(pattern);
567         size_t patcnt = 0;
568         const char* oriptr;
569         const char* patloc;
570         // find how many times the pattern occurs in the original string
571         for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
572         {
573                 patcnt++;
574         }
575         return oriptr;
576 }
577
578 cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg, const char* clFileNameForCaching, bool disableBinaryCaching)
579 {
580         const char* additionalMacros = additionalMacrosArg ? additionalMacrosArg : "";
581
582         if (disableBinaryCaching)
583         {
584                 //kernelSourceOrg = 0;
585         }
586
587         cl_program m_cpProgram = 0;
588         cl_int status;
589
590         char binaryFileName[B3_MAX_STRING_LENGTH];
591
592         char deviceName[256];
593         char driverVersion[256];
594         const char* strippedName;
595         int fileUpToDate = 0;
596 #ifdef _WIN32
597         int binaryFileValid = 0;
598 #endif
599         if (!disableBinaryCaching && clFileNameForCaching)
600         {
601                 clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL);
602                 clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
603
604                 strippedName = strip2(clFileNameForCaching, "\\");
605                 strippedName = strip2(strippedName, "/");
606
607 #ifdef _MSC_VER
608                 sprintf_s(binaryFileName, B3_MAX_STRING_LENGTH, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
609 #else
610                 sprintf(binaryFileName, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
611 #endif
612         }
613         if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary || gDebugForceLoadingFromSource))
614         {
615 #ifdef _WIN32
616                 char* bla = 0;
617
618                 //printf("searching for %s\n", binaryFileName);
619
620                 FILETIME modtimeBinary;
621                 CreateDirectoryA(sCachedBinaryPath, 0);
622                 {
623                         HANDLE binaryFileHandle = CreateFileA(binaryFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
624                         if (binaryFileHandle == INVALID_HANDLE_VALUE)
625                         {
626                                 DWORD errorCode;
627                                 errorCode = GetLastError();
628                                 switch (errorCode)
629                                 {
630                                         case ERROR_FILE_NOT_FOUND:
631                                         {
632                                                 b3Warning("\nCached file not found %s\n", binaryFileName);
633                                                 break;
634                                         }
635                                         case ERROR_PATH_NOT_FOUND:
636                                         {
637                                                 b3Warning("\nCached file path not found %s\n", binaryFileName);
638                                                 break;
639                                         }
640                                         default:
641                                         {
642                                                 b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode);
643                                         }
644                                 }
645                         }
646                         else
647                         {
648                                 if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary) == 0)
649                                 {
650                                         DWORD errorCode;
651                                         errorCode = GetLastError();
652                                         b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
653                                 }
654                                 else
655                                 {
656                                         binaryFileValid = 1;
657                                 }
658                                 CloseHandle(binaryFileHandle);
659                         }
660
661                         if (binaryFileValid)
662                         {
663                                 HANDLE srcFileHandle = CreateFileA(clFileNameForCaching, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
664
665                                 if (srcFileHandle == INVALID_HANDLE_VALUE)
666                                 {
667                                         const char* prefix[] = {"./", "../", "../../", "../../../", "../../../../"};
668                                         for (int i = 0; (srcFileHandle == INVALID_HANDLE_VALUE) && i < 5; i++)
669                                         {
670                                                 char relativeFileName[1024];
671                                                 sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
672                                                 srcFileHandle = CreateFileA(relativeFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
673                                         }
674                                 }
675
676                                 if (srcFileHandle != INVALID_HANDLE_VALUE)
677                                 {
678                                         FILETIME modtimeSrc;
679                                         if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc) == 0)
680                                         {
681                                                 DWORD errorCode;
682                                                 errorCode = GetLastError();
683                                                 b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
684                                         }
685                                         if ((modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) || ((modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime) && (modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
686                                         {
687                                                 fileUpToDate = 1;
688                                         }
689                                         else
690                                         {
691                                                 b3Warning("\nCached binary file out-of-date (%s)\n", binaryFileName);
692                                         }
693                                         CloseHandle(srcFileHandle);
694                                 }
695                                 else
696                                 {
697 #ifdef _DEBUG
698                                         DWORD errorCode;
699                                         errorCode = GetLastError();
700                                         switch (errorCode)
701                                         {
702                                                 case ERROR_FILE_NOT_FOUND:
703                                                 {
704                                                         b3Warning("\nSrc file not found %s\n", clFileNameForCaching);
705                                                         break;
706                                                 }
707                                                 case ERROR_PATH_NOT_FOUND:
708                                                 {
709                                                         b3Warning("\nSrc path not found %s\n", clFileNameForCaching);
710                                                         break;
711                                                 }
712                                                 default:
713                                                 {
714                                                         b3Warning("\nnSrc file reading errorCode = %d\n", errorCode);
715                                                 }
716                                         }
717
718                                         //we should make sure the src file exists so we can verify the timestamp with binary
719                                         //                                      assert(0);
720                                         b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n", clFileNameForCaching, binaryFileName);
721                                         fileUpToDate = true;
722 #else
723                                         //if we cannot find the source, assume it is OK in release builds
724                                         fileUpToDate = true;
725 #endif
726                                 }
727                         }
728                 }
729
730 #else
731                 fileUpToDate = true;
732                 if (mkdir(sCachedBinaryPath, 0777) == -1)
733                 {
734                 }
735                 else
736                 {
737                         b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath);
738                 }
739 #endif  //_WIN32
740         }
741
742         if (fileUpToDate)
743         {
744 #ifdef _MSC_VER
745                 FILE* file;
746                 if (fopen_s(&file, binaryFileName, "rb") != 0)
747                         file = 0;
748 #else
749                 FILE* file = fopen(binaryFileName, "rb");
750 #endif
751
752                 if (file)
753                 {
754                         size_t binarySize = 0;
755                         char* binary = 0;
756
757                         fseek(file, 0L, SEEK_END);
758                         binarySize = ftell(file);
759                         rewind(file);
760                         binary = (char*)malloc(sizeof(char) * binarySize);
761                         int bytesRead;
762                         bytesRead = fread(binary, sizeof(char), binarySize, file);
763                         fclose(file);
764
765                         m_cpProgram = clCreateProgramWithBinary(clContext, 1, &device, &binarySize, (const unsigned char**)&binary, 0, &status);
766                         b3Assert(status == CL_SUCCESS);
767                         status = clBuildProgram(m_cpProgram, 1, &device, additionalMacros, 0, 0);
768                         b3Assert(status == CL_SUCCESS);
769
770                         if (status != CL_SUCCESS)
771                         {
772                                 char* build_log;
773                                 size_t ret_val_size;
774                                 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
775                                 build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
776                                 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
777                                 build_log[ret_val_size] = '\0';
778                                 b3Error("%s\n", build_log);
779                                 free(build_log);
780                                 b3Assert(0);
781                                 m_cpProgram = 0;
782
783                                 b3Warning("clBuildProgram reported failure on cached binary: %s\n", binaryFileName);
784                         }
785                         else
786                         {
787                                 b3Printf("clBuildProgram successfully compiled cached binary: %s\n", binaryFileName);
788                         }
789                         free(binary);
790                 }
791                 else
792                 {
793                         b3Warning("Cannot open cached binary: %s\n", binaryFileName);
794                 }
795         }
796
797         if (!m_cpProgram)
798         {
799                 cl_int localErrNum;
800                 char* compileFlags;
801                 int flagsize;
802
803                 const char* kernelSource = kernelSourceOrg;
804
805                 if (!kernelSourceOrg || gDebugForceLoadingFromSource)
806                 {
807                         if (clFileNameForCaching)
808                         {
809                                 FILE* file = fopen(clFileNameForCaching, "rb");
810                                 //in many cases the relative path is a few levels up the directory hierarchy, so try it
811                                 if (!file)
812                                 {
813                                         const char* prefix[] = {"../", "../../", "../../../", "../../../../"};
814                                         for (int i = 0; !file && i < 3; i++)
815                                         {
816                                                 char relativeFileName[1024];
817                                                 sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
818                                                 file = fopen(relativeFileName, "rb");
819                                         }
820                                 }
821
822                                 if (file)
823                                 {
824                                         char* kernelSrc = 0;
825                                         fseek(file, 0L, SEEK_END);
826                                         int kernelSize = ftell(file);
827                                         rewind(file);
828                                         kernelSrc = (char*)malloc(kernelSize + 1);
829                                         int readBytes;
830                                         readBytes = fread((void*)kernelSrc, 1, kernelSize, file);
831                                         kernelSrc[kernelSize] = 0;
832                                         fclose(file);
833                                         kernelSource = kernelSrc;
834                                 }
835                         }
836                 }
837
838                 size_t program_length = kernelSource ? strlen(kernelSource) : 0;
839 #ifdef MAC  //or __APPLE__?
840                 char* flags = "-cl-mad-enable -DMAC ";
841 #else
842                 const char* flags = "";
843 #endif
844
845                 m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum);
846                 if (localErrNum != CL_SUCCESS)
847                 {
848                         if (pErrNum)
849                                 *pErrNum = localErrNum;
850                         return 0;
851                 }
852
853                 // Build the program with 'mad' Optimization option
854
855                 flagsize = sizeof(char) * (strlen(additionalMacros) + strlen(flags) + 5);
856                 compileFlags = (char*)malloc(flagsize);
857 #ifdef _MSC_VER
858                 sprintf_s(compileFlags, flagsize, "%s %s", flags, additionalMacros);
859 #else
860                 sprintf(compileFlags, "%s %s", flags, additionalMacros);
861 #endif
862                 localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL);
863                 if (localErrNum != CL_SUCCESS)
864                 {
865                         char* build_log;
866                         size_t ret_val_size;
867                         clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
868                         build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
869                         clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
870
871                         // to be carefully, terminate with \0
872                         // there's no information in the reference whether the string is 0 terminated or not
873                         build_log[ret_val_size] = '\0';
874
875                         b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
876                         free(build_log);
877                         if (pErrNum)
878                                 *pErrNum = localErrNum;
879                         return 0;
880                 }
881
882                 if (!disableBinaryCaching && clFileNameForCaching)
883                 {  //   write to binary
884
885                         cl_uint numAssociatedDevices;
886                         status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0);
887                         b3Assert(status == CL_SUCCESS);
888                         if (numAssociatedDevices == 1)
889                         {
890                                 size_t binarySize;
891                                 char* binary;
892
893                                 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0);
894                                 b3Assert(status == CL_SUCCESS);
895
896                                 binary = (char*)malloc(sizeof(char) * binarySize);
897
898                                 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0);
899                                 b3Assert(status == CL_SUCCESS);
900
901                                 {
902                                         FILE* file = 0;
903 #ifdef _MSC_VER
904                                         if (fopen_s(&file, binaryFileName, "wb") != 0)
905                                                 file = 0;
906 #else
907                                         file = fopen(binaryFileName, "wb");
908 #endif
909                                         if (file)
910                                         {
911                                                 fwrite(binary, sizeof(char), binarySize, file);
912                                                 fclose(file);
913                                         }
914                                         else
915                                         {
916                                                 b3Warning("cannot write file %s\n", binaryFileName);
917                                         }
918                                 }
919
920                                 free(binary);
921                         }
922                 }
923
924                 free(compileFlags);
925         }
926         return m_cpProgram;
927 }
928
929 cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros)
930 {
931         cl_kernel kernel;
932         cl_int localErrNum;
933
934         cl_program m_cpProgram = prog;
935
936         b3Printf("compiling kernel %s ", kernelName);
937
938         if (!m_cpProgram)
939         {
940                 m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, 0, false);
941         }
942
943         // Create the kernel
944         kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum);
945         if (localErrNum != CL_SUCCESS)
946         {
947                 b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName);
948                 assert(0);
949                 if (pErrNum)
950                         *pErrNum = localErrNum;
951                 return 0;
952         }
953
954         if (!prog && m_cpProgram)
955         {
956                 clReleaseProgram(m_cpProgram);
957         }
958         b3Printf("ready. \n");
959
960         if (pErrNum)
961                 *pErrNum = CL_SUCCESS;
962         return kernel;
963 }