1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
19 // Guoping Long, longguoping@gmail.com
20 // Niko Li, newlife20080214@gmail.com
21 // Yao Wang, bitwangyaoyao@gmail.com
22 // Redistribution and use in source and binary forms, with or without modification,
23 // are permitted provided that the following conditions are met:
25 // * Redistribution's of source code must retain the above copyright notice,
26 // this list of conditions and the following disclaimer.
28 // * Redistribution's in binary form must reproduce the above copyright notice,
29 // this list of conditions and the following disclaimer in the documentation
30 // and/or other oclMaterials provided with the distribution.
32 // * The name of the copyright holders may not be used to endorse or promote products
33 // derived from this software without specific prior written permission.
35 // This software is provided by the copyright holders and contributors "as is" and
36 // any express or implied warranties, including, but not limited to, the implied
37 // warranties of merchantability and fitness for a particular purpose are disclaimed.
38 // In no event shall the Intel Corporation or contributors be liable for any direct,
39 // indirect, incidental, special, exemplary, or consequential damages
40 // (including, but not limited to, procurement of substitute goods or services;
41 // loss of use, data, or profits; or business interruption) however caused
42 // and on any theory of liability, whether in contract, strict liability,
43 // or tort (including negligence or otherwise) arising in any way out of
44 // the use of this software, even if advised of the possibility of such damage.
48 #include "precomp.hpp"
51 #include "cl_programcache.hpp"
53 #include "opencv2/ocl/private/opencl_utils.hpp"
62 cv::Mutex initializationMutex;
63 cv::Mutex currentContextMutex;
65 static __Module __module;
67 cv::Mutex& getInitializationMutex()
69 return __module.initializationMutex;
73 struct PlatformInfoImpl
75 cl_platform_id platform_id;
77 std::vector<int> deviceIDs;
89 cl_platform_id platform_id;
90 cl_device_id device_id;
95 : platform_id(NULL), device_id(NULL)
100 static std::vector<PlatformInfoImpl> global_platforms;
101 static std::vector<DeviceInfoImpl> global_devices;
103 static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor)
105 size_t p0 = versionStr.find(' ');
108 if (p0 == std::string::npos)
110 if (p0 + 1 >= versionStr.length())
112 char c = versionStr[p0 + 1];
115 p0 = versionStr.find(' ', p0 + 1);
117 size_t p1 = versionStr.find('.', p0);
118 size_t p2 = versionStr.find(' ', p1);
119 if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos)
125 std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1);
126 std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1);
127 major = atoi(majorStr.c_str());
128 minor = atoi(minorStr.c_str());
132 static void split(const std::string &s, char delim, std::vector<std::string> &elems) {
133 std::stringstream ss(s);
135 while (std::getline(ss, item, delim)) {
136 elems.push_back(item);
140 static std::vector<std::string> split(const std::string &s, char delim) {
141 std::vector<std::string> elems;
142 split(s, delim, elems);
146 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
148 // Sample: AMD:GPU:Tahiti
149 // Sample: :GPU|CPU: = '' = ':' = '::'
150 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
151 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
153 std::string deviceTypesStr;
154 size_t p0 = configurationStr.find(':');
155 if (p0 != std::string::npos)
157 size_t p1 = configurationStr.find(':', p0 + 1);
158 if (p1 != std::string::npos)
160 size_t p2 = configurationStr.find(':', p1 + 1);
161 if (p2 != std::string::npos)
163 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
168 // assume platform + device types + device name/id
169 platform = configurationStr.substr(0, p0);
170 deviceTypesStr = configurationStr.substr(p0 + 1, p1 - (p0 + 1));
171 deviceNameOrID = configurationStr.substr(p1 + 1, configurationStr.length() - (p1 + 1));
176 // assume platform + device types
177 platform = configurationStr.substr(0, p0);
178 deviceTypesStr = configurationStr.substr(p0 + 1, configurationStr.length() - (p0 + 1));
183 // assume only platform
184 platform = configurationStr;
186 deviceTypes = split(deviceTypesStr, '|');
190 static bool __deviceSelected = false;
191 static bool selectOpenCLDevice()
193 __deviceSelected = true;
195 std::string platform;
196 std::vector<std::string> deviceTypes;
197 std::string deviceName;
198 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
201 if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
207 if (deviceName.length() == 1)
208 // We limit ID range to 0..9, because we want to write:
209 // - '2500' to mean i5-2500
210 // - '8350' to mean AMD FX-8350
211 // - '650' to mean GeForce 650
212 // To extend ID range change condition to '> 0'
215 for (size_t i = 0; i < deviceName.length(); i++)
217 if (!isdigit(deviceName[i]))
225 deviceID = atoi(deviceName.c_str());
226 CV_Assert(deviceID >= 0);
230 const PlatformInfo* platformInfo = NULL;
231 if (platform.length() > 0)
233 PlatformsInfo platforms;
234 getOpenCLPlatforms(platforms);
235 for (size_t i = 0; i < platforms.size(); i++)
237 if (platforms[i]->platformName.find(platform) != std::string::npos)
239 platformInfo = platforms[i];
243 if (platformInfo == NULL)
245 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
250 if (deviceTypes.size() == 0)
254 deviceTypes.push_back("GPU");
255 deviceTypes.push_back("CPU");
259 deviceTypes.push_back("ALL");
262 for (size_t t = 0; t < deviceTypes.size(); t++)
265 if (deviceTypes[t] == "GPU")
267 deviceType = CVCL_DEVICE_TYPE_GPU;
269 else if (deviceTypes[t] == "CPU")
271 deviceType = CVCL_DEVICE_TYPE_CPU;
273 else if (deviceTypes[t] == "ACCELERATOR")
275 deviceType = CVCL_DEVICE_TYPE_ACCELERATOR;
277 else if (deviceTypes[t] == "ALL")
279 deviceType = CVCL_DEVICE_TYPE_ALL;
283 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
288 getOpenCLDevices(devices, deviceType, platformInfo);
290 for (size_t i = (isID ? deviceID : 0);
291 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
294 if (isID || devices[i]->deviceName.find(deviceName) != std::string::npos)
296 // check for OpenCL 1.1
297 if (devices[i]->deviceVersionMajor < 1 ||
298 (devices[i]->deviceVersionMajor == 1 && devices[i]->deviceVersionMinor < 1))
300 std::cerr << "Skip unsupported version of OpenCL device: " << devices[i]->deviceName
301 << "(" << devices[i]->platform->platformName << ")" << std::endl;
302 continue; // unsupported version of device, skip it
306 setDevice(devices[i]);
310 std::cerr << "ERROR: Can't select OpenCL device: " << devices[i]->deviceName
311 << "(" << devices[i]->platform->platformName << ")" << std::endl;
319 std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
320 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
321 << " Device types: ";
322 for (size_t t = 0; t < deviceTypes.size(); t++)
324 std::cerr << deviceTypes[t] << " ";
326 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
330 static bool __initialized = false;
331 static int initializeOpenCLDevices()
333 using namespace cl_utils;
335 assert(!__initialized);
336 __initialized = true;
338 assert(global_devices.size() == 0);
340 std::vector<cl_platform_id> platforms;
343 openCLSafeCall(getPlatforms(platforms));
345 catch (cv::Exception&)
347 return 0; // OpenCL not found
350 global_platforms.resize(platforms.size());
352 for (size_t i = 0; i < platforms.size(); ++i)
354 PlatformInfoImpl& platformInfo = global_platforms[i];
355 platformInfo.info._id = i;
357 cl_platform_id platform = platforms[i];
359 platformInfo.platform_id = platform;
360 openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE, platformInfo.info.platformProfile));
361 openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VERSION, platformInfo.info.platformVersion));
362 openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_NAME, platformInfo.info.platformName));
363 openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR, platformInfo.info.platformVendor));
364 openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_EXTENSIONS, platformInfo.info.platformExtensons));
366 parseOpenCLVersion(platformInfo.info.platformVersion,
367 platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor);
369 std::vector<cl_device_id> devices;
370 cl_int status = getDevices(platform, CL_DEVICE_TYPE_ALL, devices);
371 if(status != CL_DEVICE_NOT_FOUND)
372 openCLVerifyCall(status);
374 if(devices.size() > 0)
376 int baseIndx = global_devices.size();
377 global_devices.resize(baseIndx + devices.size());
378 platformInfo.deviceIDs.resize(devices.size());
379 platformInfo.info.devices.resize(devices.size());
381 for(size_t j = 0; j < devices.size(); ++j)
383 cl_device_id device = devices[j];
385 DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j];
386 deviceInfo.info._id = baseIndx + j;
387 deviceInfo.platform_id = platform;
388 deviceInfo.device_id = device;
390 deviceInfo.info.platform = &platformInfo.info;
391 platformInfo.deviceIDs[j] = deviceInfo.info._id;
393 cl_device_type type = cl_device_type(-1);
394 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_TYPE, type));
395 deviceInfo.info.deviceType = DeviceType(type);
397 openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_PROFILE, deviceInfo.info.deviceProfile));
398 openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VERSION, deviceInfo.info.deviceVersion));
399 openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_NAME, deviceInfo.info.deviceName));
400 openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR, deviceInfo.info.deviceVendor));
401 cl_uint vendorID = 0;
402 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR_ID, vendorID));
403 deviceInfo.info.deviceVendorId = vendorID;
404 openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DRIVER_VERSION, deviceInfo.info.deviceDriverVersion));
405 openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, deviceInfo.info.deviceExtensions));
407 parseOpenCLVersion(deviceInfo.info.deviceVersion,
408 deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor);
410 size_t maxWorkGroupSize = 0;
411 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_GROUP_SIZE, maxWorkGroupSize));
412 deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize;
414 cl_uint maxDimensions = 0;
415 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, maxDimensions));
416 std::vector<size_t> maxWorkItemSizes(maxDimensions);
417 openCLSafeCall(clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions,
418 (void *)&maxWorkItemSizes[0], 0));
419 deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes;
421 cl_uint maxComputeUnits = 0;
422 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_COMPUTE_UNITS, maxComputeUnits));
423 deviceInfo.info.maxComputeUnits = maxComputeUnits;
425 cl_ulong localMemorySize = 0;
426 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_LOCAL_MEM_SIZE, localMemorySize));
427 deviceInfo.info.localMemorySize = (size_t)localMemorySize;
429 cl_ulong maxMemAllocSize = 0;
430 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, maxMemAllocSize));
431 deviceInfo.info.maxMemAllocSize = (size_t)maxMemAllocSize;
433 cl_bool unifiedMemory = false;
434 openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_HOST_UNIFIED_MEMORY, unifiedMemory));
435 deviceInfo.info.isUnifiedMemory = unifiedMemory != 0;
437 //initialize extra options for compilation. Currently only fp64 is included.
438 //Assume 4KB is enough to store all possible extensions.
439 openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, deviceInfo.info.deviceExtensions));
441 size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64");
442 if(fp64_khr != std::string::npos)
444 deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT";
445 deviceInfo.info.haveDoubleSupport = true;
449 deviceInfo.info.haveDoubleSupport = false;
455 for (size_t i = 0; i < platforms.size(); ++i)
457 PlatformInfoImpl& platformInfo = global_platforms[i];
458 for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j)
460 DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]];
461 platformInfo.info.devices[j] = &deviceInfo.info;
465 return global_devices.size();
469 DeviceInfo::DeviceInfo()
470 : _id(-1), deviceType(DeviceType(0)),
472 maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), maxMemAllocSize(0),
473 deviceVersionMajor(0), deviceVersionMinor(0),
474 haveDoubleSupport(false), isUnifiedMemory(false),
480 PlatformInfo::PlatformInfo()
482 platformVersionMajor(0), platformVersionMinor(0)
487 //////////////////////////////// OpenCL context ////////////////////////
488 //This is a global singleton class used to represent a OpenCL context.
489 class ContextImpl : public Context
492 const cl_device_id clDeviceID;
493 cl_context clContext;
494 cl_command_queue clCmdQueue;
495 const DeviceInfo& deviceInfo;
498 ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID)
499 : clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo)
505 static void setContext(const DeviceInfo* deviceInfo);
507 bool supportsFeature(FEATURE_TYPE featureType) const;
509 static void cleanupContext(void);
512 ContextImpl(const ContextImpl&); // disabled
513 ContextImpl& operator=(const ContextImpl&); // disabled
516 static ContextImpl* currentContext = NULL;
518 Context* Context::getContext()
520 if (currentContext == NULL)
522 if (!__initialized || !__deviceSelected)
524 cv::AutoLock lock(getInitializationMutex());
527 if (initializeOpenCLDevices() == 0)
529 CV_Error(Error::OpenCLInitError, "OpenCL not available");
532 if (!__deviceSelected)
534 if (!selectOpenCLDevice())
536 CV_Error(Error::OpenCLInitError, "Can't select OpenCL device");
540 CV_Assert(currentContext != NULL);
542 return currentContext;
545 bool Context::supportsFeature(FEATURE_TYPE featureType) const
547 return ((ContextImpl*)this)->supportsFeature(featureType);
550 const DeviceInfo& Context::getDeviceInfo() const
552 return ((ContextImpl*)this)->deviceInfo;
555 const void* Context::getOpenCLContextPtr() const
557 return &(((ContextImpl*)this)->clContext);
560 const void* Context::getOpenCLCommandQueuePtr() const
562 return &(((ContextImpl*)this)->clCmdQueue);
565 const void* Context::getOpenCLDeviceIDPtr() const
567 return &(((ContextImpl*)this)->clDeviceID);
571 bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const
575 case FEATURE_CL_DOUBLE:
576 return deviceInfo.haveDoubleSupport;
577 case FEATURE_CL_UNIFIED_MEM:
578 return deviceInfo.isUnifiedMemory;
579 case FEATURE_CL_VER_1_2:
580 return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2);
582 CV_Error(CV_StsBadArg, "Invalid feature type");
587 static bool __termination = false;
590 ContextImpl::~ContextImpl()
593 // if process is on termination stage (ExitProcess was called and other threads were terminated)
594 // then disable command queue release because it may cause program hang
600 openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here
605 openCLSafeCall(clReleaseContext(clContext));
613 void clBlasTeardown();
615 void ContextImpl::cleanupContext(void)
620 cv::AutoLock lock(__module.currentContextMutex);
622 delete currentContext;
623 currentContext = NULL;
626 void ContextImpl::setContext(const DeviceInfo* deviceInfo)
628 CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size());
631 cv::AutoLock lock(__module.currentContextMutex);
634 if (currentContext->deviceInfo._id == deviceInfo->_id)
639 DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
640 CV_Assert(deviceInfo == &infoImpl.info);
643 cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 };
644 cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status);
645 openCLVerifyCall(status);
646 // TODO add CL_QUEUE_PROFILING_ENABLE
647 cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status);
648 openCLVerifyCall(status);
650 ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id);
651 ctx->clCmdQueue = clCmdQueue;
652 ctx->clContext = clContext;
654 ContextImpl* old = NULL;
656 cv::AutoLock lock(__module.currentContextMutex);
657 old = currentContext;
658 currentContext = ctx;
666 int getOpenCLPlatforms(PlatformsInfo& platforms)
669 initializeOpenCLDevices();
673 for (size_t id = 0; id < global_platforms.size(); ++id)
675 PlatformInfoImpl& impl = global_platforms[id];
676 platforms.push_back(&impl.info);
679 return platforms.size();
682 int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform)
685 initializeOpenCLDevices();
691 case CVCL_DEVICE_TYPE_DEFAULT:
692 case CVCL_DEVICE_TYPE_CPU:
693 case CVCL_DEVICE_TYPE_GPU:
694 case CVCL_DEVICE_TYPE_ACCELERATOR:
695 case CVCL_DEVICE_TYPE_ALL:
701 if (platform == NULL)
703 for (size_t id = 0; id < global_devices.size(); ++id)
705 DeviceInfoImpl& deviceInfo = global_devices[id];
706 if (((int)deviceInfo.info.deviceType & deviceType) != 0)
708 devices.push_back(&deviceInfo.info);
714 for (size_t id = 0; id < platform->devices.size(); ++id)
716 const DeviceInfo* deviceInfo = platform->devices[id];
717 if (((int)deviceInfo->deviceType & deviceType) == deviceType)
719 devices.push_back(deviceInfo);
724 return (int)devices.size();
727 void setDevice(const DeviceInfo* info)
729 if (!__deviceSelected)
730 __deviceSelected = true;
732 ContextImpl::setContext(info);
735 bool supportsFeature(FEATURE_TYPE featureType)
737 return Context::getContext()->supportsFeature(featureType);
742 /* moved to Context::getContext(): initializeOpenCLDevices(); */
745 __Module::~__Module()
747 #if defined(WIN32) && defined(CVAPI_EXPORTS)
748 // nothing, see DllMain
750 ContextImpl::cleanupContext();
758 #if defined(WIN32) && defined(CVAPI_EXPORTS)
761 BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved)
763 if (fdwReason == DLL_PROCESS_DETACH)
765 if (lpReserved != NULL) // called after ExitProcess() call
766 cv::ocl::__termination = true;
767 cv::ocl::ContextImpl::cleanupContext();