2 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
3 Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
16 //Original author: Roman Ponomarev
17 //Mostly Reimplemented by Erwin Coumans
19 bool gDebugForceLoadingFromSource = false;
20 bool gDebugSkipLoadingBinary = false;
22 #include "Bullet3Common/b3Logging.h"
27 #pragma warning(disable : 4996)
29 #include "b3OpenCLUtils.h"
30 //#include "b3OpenCLInclude.h"
35 #define B3_MAX_CL_DEVICES 16 //who needs 16 devices?
42 #define b3Assert assert
48 static const char* sCachedBinaryPath = "cache";
50 //Set the preferred platform vendor using the OpenCL SDK
51 static const char* spPlatformVendor =
52 #if defined(CL_PLATFORM_MINI_CL)
54 #elif defined(CL_PLATFORM_AMD)
55 "Advanced Micro Devices, Inc.";
56 #elif defined(CL_PLATFORM_NVIDIA)
58 #elif defined(CL_PLATFORM_INTEL)
59 "Intel(R) Corporation";
60 #elif defined(B3_USE_CLEW)
61 "clew (OpenCL Extension Wrangler library)";
66 #ifndef CL_PLATFORM_MINI_CL
74 void MyFatalBreakAPPLE(const char* errstr,
75 const void* private_info,
79 const char* patloc = strstr(errstr, "Warning");
80 //find out if it is a warning or error, exit if error
84 b3Warning("Warning: %s\n", errstr);
88 b3Error("Error: %s\n", errstr);
95 int b3OpenCLUtils_clewInit()
100 const char* cl = "OpenCL.dll";
101 #elif defined __APPLE__
102 const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
103 #else //presumable Linux? \
104 //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so
105 const char* cl = "libOpenCL.so.1";
106 result = clewInit(cl);
107 if (result != CLEW_SUCCESS)
116 result = clewInit(cl);
117 if (result != CLEW_SUCCESS)
119 b3Error("clewInit failed with error code %d\n", result);
123 b3Printf("clewInit succesfull using %s\n", cl);
129 int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum)
132 b3OpenCLUtils_clewInit();
135 cl_platform_id pPlatforms[10] = {0};
137 cl_uint numPlatforms = 0;
138 cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms);
139 //cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
141 if (ciErrNum != CL_SUCCESS)
149 const char* b3OpenCLUtils_getSdkVendorName()
151 return spPlatformVendor;
154 void b3OpenCLUtils_setCachePath(const char* path)
156 sCachedBinaryPath = path;
159 cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
162 b3OpenCLUtils_clewInit();
165 cl_platform_id platform = 0;
166 unsigned int platformIndex = (unsigned int)platformIndex0;
167 cl_uint numPlatforms;
168 cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
170 if (platformIndex < numPlatforms)
172 cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
173 ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
174 if (ciErrNum != CL_SUCCESS)
181 platform = platforms[platformIndex];
189 void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo)
193 ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, B3_MAX_STRING_LENGTH, platformInfo->m_platformVendor, NULL);
194 oclCHECKERROR(ciErrNum, CL_SUCCESS);
195 ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_NAME, B3_MAX_STRING_LENGTH, platformInfo->m_platformName, NULL);
196 oclCHECKERROR(ciErrNum, CL_SUCCESS);
197 ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, B3_MAX_STRING_LENGTH, platformInfo->m_platformVersion, NULL);
198 oclCHECKERROR(ciErrNum, CL_SUCCESS);
201 void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)
203 b3OpenCLPlatformInfo platformInfo;
204 b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
205 b3Printf("Platform info:\n");
206 b3Printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n", platformInfo.m_platformVendor);
207 b3Printf(" CL_PLATFORM_NAME: \t\t\t%s\n", platformInfo.m_platformName);
208 b3Printf(" CL_PLATFORM_VERSION: \t\t\t%s\n", platformInfo.m_platformVersion);
211 cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
213 cl_context retContext = 0;
216 cl_device_id devices[B3_MAX_CL_DEVICES];
218 cl_context_properties* cprops;
221 * If we could find our platform, use it. Otherwise pass a NULL and get whatever the
222 * implementation thinks we should be using.
224 cl_context_properties cps[7] = {0, 0, 0, 0, 0, 0, 0};
225 cps[0] = CL_CONTEXT_PLATFORM;
226 cps[1] = (cl_context_properties)platform;
229 if (pGLContext && pGLDC)
231 cps[2] = CL_GL_CONTEXT_KHR;
232 cps[3] = (cl_context_properties)pGLContext;
233 cps[4] = CL_WGL_HDC_KHR;
234 cps[5] = (cl_context_properties)pGLDC;
238 num_entries = B3_MAX_CL_DEVICES;
242 ciErrNum = clGetDeviceIDs(
251 b3Printf("clGetDeviceIDs returned %d\n", ciErrNum);
254 cprops = (NULL == platform) ? NULL : cps;
261 //search for the GPU that relates to the OpenCL context
263 for (i = 0; i < num_devices; i++)
265 retContext = clCreateContext(cprops, 1, &devices[i], NULL, NULL, &ciErrNum);
266 if (ciErrNum == CL_SUCCESS)
272 if (preferredDeviceIndex >= 0 && (unsigned int)preferredDeviceIndex < num_devices)
274 //create a context of the preferred device index
275 retContext = clCreateContext(cprops, 1, &devices[preferredDeviceIndex], NULL, NULL, &ciErrNum);
279 //create a context of all devices
280 #if defined(__APPLE__)
281 retContext = clCreateContext(cprops, num_devices, devices, MyFatalBreakAPPLE, NULL, &ciErrNum);
283 b3Printf("numDevices=%d\n", num_devices);
285 retContext = clCreateContext(cprops, num_devices, devices, NULL, NULL, &ciErrNum);
297 cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId)
300 b3OpenCLUtils_clewInit();
303 cl_uint numPlatforms;
304 cl_context retContext = 0;
307 cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
308 if (ciErrNum != CL_SUCCESS)
310 if (pErrNum != NULL) *pErrNum = ciErrNum;
313 if (numPlatforms > 0)
315 cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
316 ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
317 if (ciErrNum != CL_SUCCESS)
325 for (i = 0; i < numPlatforms; ++i)
328 ciErrNum = clGetPlatformInfo(platforms[i],
333 if (ciErrNum != CL_SUCCESS)
335 if (pErrNum != NULL) *pErrNum = ciErrNum;
339 if (preferredPlatformIndex >= 0 && i == preferredPlatformIndex)
341 cl_platform_id tmpPlatform = platforms[0];
342 platforms[0] = platforms[i];
343 platforms[i] = tmpPlatform;
348 if (!strcmp(pbuf, spPlatformVendor))
350 cl_platform_id tmpPlatform = platforms[0];
351 platforms[0] = platforms[i];
352 platforms[i] = tmpPlatform;
357 for (i = 0; i < numPlatforms; ++i)
359 cl_platform_id platform = platforms[i];
362 retContext = b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLContext, pGLDC, preferredDeviceIndex, preferredPlatformIndex);
366 // printf("OpenCL platform details:\n");
367 b3OpenCLPlatformInfo platformInfo;
369 b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
372 *retPlatformId = platform;
383 //////////////////////////////////////////////////////////////////////////////
384 //! Gets the id of the nth device from the context
386 //! @return the id or -1 when out of range
387 //! @param cxMainContext OpenCL context
388 //! @param device_idx index of the device of interest
389 //////////////////////////////////////////////////////////////////////////////
390 cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
392 assert(cxMainContext);
394 size_t szParmDataBytes;
395 cl_device_id* cdDevices;
398 // get the list of devices associated with context
399 clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
401 if (szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex)
403 return (cl_device_id)-1;
406 cdDevices = (cl_device_id*)malloc(szParmDataBytes);
408 clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
410 device = cdDevices[deviceIndex];
416 int b3OpenCLUtils_getNumDevices(cl_context cxMainContext)
418 size_t szParamDataBytes;
420 clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes);
421 device_count = (int)szParamDataBytes / sizeof(cl_device_id);
425 void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info)
428 clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
431 clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL);
434 clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL);
437 clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL);
439 // CL_DEVICE_MAX_COMPUTE_UNITS
440 clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL);
442 // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
443 clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL);
445 // CL_DEVICE_MAX_WORK_ITEM_SIZES
446 clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL);
448 // CL_DEVICE_MAX_WORK_GROUP_SIZE
449 clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL);
451 // CL_DEVICE_MAX_CLOCK_FREQUENCY
452 clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL);
454 // CL_DEVICE_ADDRESS_BITS
455 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL);
457 // CL_DEVICE_MAX_MEM_ALLOC_SIZE
458 clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL);
460 // CL_DEVICE_GLOBAL_MEM_SIZE
461 clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL);
463 // CL_DEVICE_ERROR_CORRECTION_SUPPORT
464 clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL);
466 // CL_DEVICE_LOCAL_MEM_TYPE
467 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL);
469 // CL_DEVICE_LOCAL_MEM_SIZE
470 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL);
472 // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
473 clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL);
475 // CL_DEVICE_QUEUE_PROPERTIES
476 clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL);
478 // CL_DEVICE_IMAGE_SUPPORT
479 clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL);
481 // CL_DEVICE_MAX_READ_IMAGE_ARGS
482 clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL);
484 // CL_DEVICE_MAX_WRITE_IMAGE_ARGS
485 clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL);
487 // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
488 clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL);
489 clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL);
490 clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL);
491 clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL);
492 clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL);
494 // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
495 clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL);
497 // CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
498 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL);
499 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL);
500 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL);
501 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL);
502 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL);
503 clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL);
506 void b3OpenCLUtils_printDeviceInfo(cl_device_id device)
508 b3OpenCLDeviceInfo info;
509 b3OpenCLUtils::getDeviceInfo(device, &info);
510 b3Printf("Device Info:\n");
511 b3Printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
512 b3Printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
513 b3Printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion);
515 if (info.m_deviceType & CL_DEVICE_TYPE_CPU)
516 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
517 if (info.m_deviceType & CL_DEVICE_TYPE_GPU)
518 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
519 if (info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR)
520 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
521 if (info.m_deviceType & CL_DEVICE_TYPE_DEFAULT)
522 b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
524 b3Printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits);
525 b3Printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims);
526 b3Printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]);
527 b3Printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize);
528 b3Printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency);
529 b3Printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits);
530 b3Printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize / (1024 * 1024)));
531 b3Printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize / (1024 * 1024)));
532 b3Printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport == CL_TRUE ? "yes" : "no");
533 b3Printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global");
534 b3Printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024));
535 b3Printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024));
536 if (info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
537 b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
538 if (info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE)
539 b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
541 b3Printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport);
543 b3Printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs);
544 b3Printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs);
545 b3Printf("\n CL_DEVICE_IMAGE <dim>");
546 b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth);
547 b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight);
548 b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth);
549 b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight);
550 b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth);
551 if (*info.m_deviceExtensions != 0)
553 b3Printf("\n CL_DEVICE_EXTENSIONS:%s\n", info.m_deviceExtensions);
557 b3Printf(" CL_DEVICE_EXTENSIONS: None\n");
559 b3Printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
560 b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
561 info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong, info.m_vecWidthFloat, info.m_vecWidthDouble);
564 static const char* strip2(const char* name, const char* pattern)
566 size_t const patlen = strlen(pattern);
570 // find how many times the pattern occurs in the original string
571 for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
578 cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg, const char* clFileNameForCaching, bool disableBinaryCaching)
580 const char* additionalMacros = additionalMacrosArg ? additionalMacrosArg : "";
582 if (disableBinaryCaching)
584 //kernelSourceOrg = 0;
587 cl_program m_cpProgram = 0;
590 char binaryFileName[B3_MAX_STRING_LENGTH];
592 char deviceName[256];
593 char driverVersion[256];
594 const char* strippedName;
595 int fileUpToDate = 0;
597 int binaryFileValid = 0;
599 if (!disableBinaryCaching && clFileNameForCaching)
601 clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL);
602 clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
604 strippedName = strip2(clFileNameForCaching, "\\");
605 strippedName = strip2(strippedName, "/");
608 sprintf_s(binaryFileName, B3_MAX_STRING_LENGTH, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
610 sprintf(binaryFileName, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
613 if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary || gDebugForceLoadingFromSource))
618 //printf("searching for %s\n", binaryFileName);
620 FILETIME modtimeBinary;
621 CreateDirectoryA(sCachedBinaryPath, 0);
623 HANDLE binaryFileHandle = CreateFileA(binaryFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
624 if (binaryFileHandle == INVALID_HANDLE_VALUE)
627 errorCode = GetLastError();
630 case ERROR_FILE_NOT_FOUND:
632 b3Warning("\nCached file not found %s\n", binaryFileName);
635 case ERROR_PATH_NOT_FOUND:
637 b3Warning("\nCached file path not found %s\n", binaryFileName);
642 b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode);
648 if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary) == 0)
651 errorCode = GetLastError();
652 b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
658 CloseHandle(binaryFileHandle);
663 HANDLE srcFileHandle = CreateFileA(clFileNameForCaching, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
665 if (srcFileHandle == INVALID_HANDLE_VALUE)
667 const char* prefix[] = {"./", "../", "../../", "../../../", "../../../../"};
668 for (int i = 0; (srcFileHandle == INVALID_HANDLE_VALUE) && i < 5; i++)
670 char relativeFileName[1024];
671 sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
672 srcFileHandle = CreateFileA(relativeFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
676 if (srcFileHandle != INVALID_HANDLE_VALUE)
679 if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc) == 0)
682 errorCode = GetLastError();
683 b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
685 if ((modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) || ((modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime) && (modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
691 b3Warning("\nCached binary file out-of-date (%s)\n", binaryFileName);
693 CloseHandle(srcFileHandle);
699 errorCode = GetLastError();
702 case ERROR_FILE_NOT_FOUND:
704 b3Warning("\nSrc file not found %s\n", clFileNameForCaching);
707 case ERROR_PATH_NOT_FOUND:
709 b3Warning("\nSrc path not found %s\n", clFileNameForCaching);
714 b3Warning("\nnSrc file reading errorCode = %d\n", errorCode);
718 //we should make sure the src file exists so we can verify the timestamp with binary
720 b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n", clFileNameForCaching, binaryFileName);
723 //if we cannot find the source, assume it is OK in release builds
732 if (mkdir(sCachedBinaryPath, 0777) == -1)
737 b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath);
746 if (fopen_s(&file, binaryFileName, "rb") != 0)
749 FILE* file = fopen(binaryFileName, "rb");
754 size_t binarySize = 0;
757 fseek(file, 0L, SEEK_END);
758 binarySize = ftell(file);
760 binary = (char*)malloc(sizeof(char) * binarySize);
762 bytesRead = fread(binary, sizeof(char), binarySize, file);
765 m_cpProgram = clCreateProgramWithBinary(clContext, 1, &device, &binarySize, (const unsigned char**)&binary, 0, &status);
766 b3Assert(status == CL_SUCCESS);
767 status = clBuildProgram(m_cpProgram, 1, &device, additionalMacros, 0, 0);
768 b3Assert(status == CL_SUCCESS);
770 if (status != CL_SUCCESS)
774 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
775 build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
776 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
777 build_log[ret_val_size] = '\0';
778 b3Error("%s\n", build_log);
783 b3Warning("clBuildProgram reported failure on cached binary: %s\n", binaryFileName);
787 b3Printf("clBuildProgram successfully compiled cached binary: %s\n", binaryFileName);
793 b3Warning("Cannot open cached binary: %s\n", binaryFileName);
803 const char* kernelSource = kernelSourceOrg;
805 if (!kernelSourceOrg || gDebugForceLoadingFromSource)
807 if (clFileNameForCaching)
809 FILE* file = fopen(clFileNameForCaching, "rb");
810 //in many cases the relative path is a few levels up the directory hierarchy, so try it
813 const char* prefix[] = {"../", "../../", "../../../", "../../../../"};
814 for (int i = 0; !file && i < 3; i++)
816 char relativeFileName[1024];
817 sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
818 file = fopen(relativeFileName, "rb");
825 fseek(file, 0L, SEEK_END);
826 int kernelSize = ftell(file);
828 kernelSrc = (char*)malloc(kernelSize + 1);
830 readBytes = fread((void*)kernelSrc, 1, kernelSize, file);
831 kernelSrc[kernelSize] = 0;
833 kernelSource = kernelSrc;
838 size_t program_length = kernelSource ? strlen(kernelSource) : 0;
839 #ifdef MAC //or __APPLE__?
840 char* flags = "-cl-mad-enable -DMAC ";
842 const char* flags = "";
845 m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum);
846 if (localErrNum != CL_SUCCESS)
849 *pErrNum = localErrNum;
853 // Build the program with 'mad' Optimization option
855 flagsize = sizeof(char) * (strlen(additionalMacros) + strlen(flags) + 5);
856 compileFlags = (char*)malloc(flagsize);
858 sprintf_s(compileFlags, flagsize, "%s %s", flags, additionalMacros);
860 sprintf(compileFlags, "%s %s", flags, additionalMacros);
862 localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL);
863 if (localErrNum != CL_SUCCESS)
867 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
868 build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
869 clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
871 // to be carefully, terminate with \0
872 // there's no information in the reference whether the string is 0 terminated or not
873 build_log[ret_val_size] = '\0';
875 b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
878 *pErrNum = localErrNum;
882 if (!disableBinaryCaching && clFileNameForCaching)
885 cl_uint numAssociatedDevices;
886 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0);
887 b3Assert(status == CL_SUCCESS);
888 if (numAssociatedDevices == 1)
893 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0);
894 b3Assert(status == CL_SUCCESS);
896 binary = (char*)malloc(sizeof(char) * binarySize);
898 status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0);
899 b3Assert(status == CL_SUCCESS);
904 if (fopen_s(&file, binaryFileName, "wb") != 0)
907 file = fopen(binaryFileName, "wb");
911 fwrite(binary, sizeof(char), binarySize, file);
916 b3Warning("cannot write file %s\n", binaryFileName);
929 cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros)
934 cl_program m_cpProgram = prog;
936 b3Printf("compiling kernel %s ", kernelName);
940 m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, 0, false);
944 kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum);
945 if (localErrNum != CL_SUCCESS)
947 b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName);
950 *pErrNum = localErrNum;
954 if (!prog && m_cpProgram)
956 clReleaseProgram(m_cpProgram);
958 b3Printf("ready. \n");
961 *pErrNum = CL_SUCCESS;