1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
19 // Guoping Long, longguoping@gmail.com
20 // Niko Li, newlife20080214@gmail.com
21 // Yao Wang, bitwangyaoyao@gmail.com
22 // Redistribution and use in source and binary forms, with or without modification,
23 // are permitted provided that the following conditions are met:
25 // * Redistribution's of source code must retain the above copyright notice,
26 // this list of conditions and the following disclaimer.
28 // * Redistribution's in binary form must reproduce the above copyright notice,
29 // this list of conditions and the following disclaimer in the documentation
30 // and/or other oclMaterials provided with the distribution.
32 // * The name of the copyright holders may not be used to endorse or promote products
33 // derived from this software without specific prior written permission.
35 // This software is provided by the copyright holders and contributors "as is" and
36 // any express or implied warranties, including, but not limited to, the implied
37 // warranties of merchantability and fitness for a particular purpose are disclaimed.
38 // In no event shall the Intel Corporation or contributors be liable for any direct,
39 // indirect, incidental, special, exemplary, or consequential damages
40 // (including, but not limited to, procurement of substitute goods or services;
41 // loss of use, data, or profits; or business interruption) however caused
42 // and on any theory of liability, whether in contract, strict liability,
43 // or tort (including negligence or otherwise) arising in any way out of
44 // the use of this software, even if advised of the possibility of such damage.
48 #include "precomp.hpp"
51 #include "cl_programcache.hpp"
53 // workaround for OpenCL C++ bindings
54 #if defined(HAVE_OPENCL12)
55 #include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp"
56 #elif defined(HAVE_OPENCL11)
57 #include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp"
59 #error Invalid OpenCL configuration
62 #if defined _MSC_VER && _MSC_VER >= 1200
63 #pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610)
65 #undef __CL_ENABLE_EXCEPTIONS
71 struct PlatformInfoImpl
73 cl_platform_id platform_id;
75 std::vector<int> deviceIDs;
87 cl_platform_id platform_id;
88 cl_device_id device_id;
93 : platform_id(NULL), device_id(NULL)
98 static std::vector<PlatformInfoImpl> global_platforms;
99 static std::vector<DeviceInfoImpl> global_devices;
101 static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor)
103 size_t p0 = versionStr.find(' ');
106 if (p0 == std::string::npos)
108 if (p0 + 1 >= versionStr.length())
110 char c = versionStr[p0 + 1];
113 p0 = versionStr.find(' ', p0 + 1);
115 size_t p1 = versionStr.find('.', p0);
116 size_t p2 = versionStr.find(' ', p1);
117 if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos)
123 std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1);
124 std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1);
125 major = atoi(majorStr.c_str());
126 minor = atoi(minorStr.c_str());
130 static void split(const std::string &s, char delim, std::vector<std::string> &elems) {
131 std::stringstream ss(s);
133 while (std::getline(ss, item, delim)) {
134 elems.push_back(item);
138 static std::vector<std::string> split(const std::string &s, char delim) {
139 std::vector<std::string> elems;
140 split(s, delim, elems);
144 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
146 // Sample: AMD:GPU:Tahiti
147 // Sample: :GPU|CPU: = '' = ':' = '::'
148 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
149 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
151 std::string deviceTypesStr;
152 size_t p0 = configurationStr.find(':');
153 if (p0 != std::string::npos)
155 size_t p1 = configurationStr.find(':', p0 + 1);
156 if (p1 != std::string::npos)
158 size_t p2 = configurationStr.find(':', p1 + 1);
159 if (p2 != std::string::npos)
161 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
166 // assume platform + device types + device name/id
167 platform = configurationStr.substr(0, p0);
168 deviceTypesStr = configurationStr.substr(p0 + 1, p1 - (p0 + 1));
169 deviceNameOrID = configurationStr.substr(p1 + 1, configurationStr.length() - (p1 + 1));
174 // assume platform + device types
175 platform = configurationStr.substr(0, p0);
176 deviceTypesStr = configurationStr.substr(p0 + 1, configurationStr.length() - (p0 + 1));
181 // assume only platform
182 platform = configurationStr;
184 deviceTypes = split(deviceTypesStr, '|');
188 static bool __deviceSelected = false;
189 static bool selectOpenCLDevice()
191 __deviceSelected = true;
193 std::string platform;
194 std::vector<std::string> deviceTypes;
195 std::string deviceName;
196 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
199 if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
205 if (deviceName.length() == 1)
206 // We limit ID range to 0..9, because we want to write:
207 // - '2500' to mean i5-2500
208 // - '8350' to mean AMD FX-8350
209 // - '650' to mean GeForce 650
210 // To extend ID range change condition to '> 0'
213 for (size_t i = 0; i < deviceName.length(); i++)
215 if (!isdigit(deviceName[i]))
223 deviceID = atoi(deviceName.c_str());
224 CV_Assert(deviceID >= 0);
228 const PlatformInfo* platformInfo = NULL;
229 if (platform.length() > 0)
231 PlatformsInfo platforms;
232 getOpenCLPlatforms(platforms);
233 for (size_t i = 0; i < platforms.size(); i++)
235 if (platforms[i]->platformName.find(platform) != std::string::npos)
237 platformInfo = platforms[i];
241 if (platformInfo == NULL)
243 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
248 if (deviceTypes.size() == 0)
252 deviceTypes.push_back("GPU");
253 deviceTypes.push_back("CPU");
257 deviceTypes.push_back("ALL");
260 for (size_t t = 0; t < deviceTypes.size(); t++)
263 if (deviceTypes[t] == "GPU")
265 deviceType = CVCL_DEVICE_TYPE_GPU;
267 else if (deviceTypes[t] == "CPU")
269 deviceType = CVCL_DEVICE_TYPE_CPU;
271 else if (deviceTypes[t] == "ACCELERATOR")
273 deviceType = CVCL_DEVICE_TYPE_ACCELERATOR;
275 else if (deviceTypes[t] == "ALL")
277 deviceType = CVCL_DEVICE_TYPE_ALL;
281 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
286 getOpenCLDevices(devices, deviceType, platformInfo);
288 for (size_t i = (isID ? deviceID : 0);
289 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
292 if (isID || devices[i]->deviceName.find(deviceName) != std::string::npos)
294 // check for OpenCL 1.1
295 if (devices[i]->deviceVersionMajor < 1 ||
296 (devices[i]->deviceVersionMajor == 1 && devices[i]->deviceVersionMinor < 1))
298 std::cerr << "Skip unsupported version of OpenCL device: " << devices[i]->deviceName
299 << "(" << devices[i]->platform->platformName << ")" << std::endl;
300 continue; // unsupported version of device, skip it
304 setDevice(devices[i]);
308 std::cerr << "ERROR: Can't select OpenCL device: " << devices[i]->deviceName
309 << "(" << devices[i]->platform->platformName << ")" << std::endl;
317 std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
318 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
319 << " Device types: ";
320 for (size_t t = 0; t < deviceTypes.size(); t++)
322 std::cerr << deviceTypes[t] << " ";
324 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
328 static cv::Mutex __initializedMutex;
329 static bool __initialized = false;
330 static int initializeOpenCLDevices()
332 assert(!__initialized);
333 __initialized = true;
335 assert(global_devices.size() == 0);
337 std::vector<cl::Platform> platforms;
340 openCLSafeCall(cl::Platform::get(&platforms));
342 catch (cv::Exception& e)
344 return 0; // OpenCL not found
347 global_platforms.resize(platforms.size());
349 for (size_t i = 0; i < platforms.size(); ++i)
351 PlatformInfoImpl& platformInfo = global_platforms[i];
352 platformInfo.info._id = i;
354 cl::Platform& platform = platforms[i];
356 platformInfo.platform_id = platform();
357 openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile));
358 openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion));
359 openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName));
360 openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor));
361 openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons));
363 parseOpenCLVersion(platformInfo.info.platformVersion,
364 platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor);
366 std::vector<cl::Device> devices;
367 cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
368 if(status != CL_DEVICE_NOT_FOUND)
369 openCLVerifyCall(status);
371 if(devices.size() > 0)
373 int baseIndx = global_devices.size();
374 global_devices.resize(baseIndx + devices.size());
375 platformInfo.deviceIDs.resize(devices.size());
376 platformInfo.info.devices.resize(devices.size());
378 for(size_t j = 0; j < devices.size(); ++j)
380 cl::Device& device = devices[j];
382 DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j];
383 deviceInfo.info._id = baseIndx + j;
384 deviceInfo.platform_id = platform();
385 deviceInfo.device_id = device();
387 deviceInfo.info.platform = &platformInfo.info;
388 platformInfo.deviceIDs[j] = deviceInfo.info._id;
390 cl_device_type type = cl_device_type(-1);
391 openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type));
392 deviceInfo.info.deviceType = DeviceType(type);
394 openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile));
395 openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion));
396 openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName));
397 openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor));
398 cl_uint vendorID = 0;
399 openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID));
400 deviceInfo.info.deviceVendorId = vendorID;
401 openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion));
402 openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions));
404 parseOpenCLVersion(deviceInfo.info.deviceVersion,
405 deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor);
407 size_t maxWorkGroupSize = 0;
408 openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize));
409 deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize;
411 cl_uint maxDimensions = 0;
412 openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions));
413 std::vector<size_t> maxWorkItemSizes(maxDimensions);
414 openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions,
415 (void *)&maxWorkItemSizes[0], 0));
416 deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes;
418 cl_uint maxComputeUnits = 0;
419 openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits));
420 deviceInfo.info.maxComputeUnits = maxComputeUnits;
422 cl_ulong localMemorySize = 0;
423 openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize));
424 deviceInfo.info.localMemorySize = (size_t)localMemorySize;
427 cl_bool unifiedMemory = false;
428 openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory));
429 deviceInfo.info.isUnifiedMemory = unifiedMemory != 0;
431 //initialize extra options for compilation. Currently only fp64 is included.
432 //Assume 4KB is enough to store all possible extensions.
433 openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions));
435 size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64");
436 if(fp64_khr != std::string::npos)
438 deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT";
439 deviceInfo.info.haveDoubleSupport = true;
443 deviceInfo.info.haveDoubleSupport = false;
449 for (size_t i = 0; i < platforms.size(); ++i)
451 PlatformInfoImpl& platformInfo = global_platforms[i];
452 for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j)
454 DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]];
455 platformInfo.info.devices[j] = &deviceInfo.info;
459 return global_devices.size();
463 DeviceInfo::DeviceInfo()
464 : _id(-1), deviceType(DeviceType(0)),
466 maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0),
467 deviceVersionMajor(0), deviceVersionMinor(0),
468 haveDoubleSupport(false), isUnifiedMemory(false),
474 PlatformInfo::PlatformInfo()
476 platformVersionMajor(0), platformVersionMinor(0)
481 //////////////////////////////// OpenCL context ////////////////////////
482 //This is a global singleton class used to represent a OpenCL context.
483 class ContextImpl : public Context
486 const cl_device_id clDeviceID;
487 cl_context clContext;
488 cl_command_queue clCmdQueue;
489 const DeviceInfo& deviceInfo;
492 ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID)
493 : clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo)
499 static void setContext(const DeviceInfo* deviceInfo);
501 bool supportsFeature(FEATURE_TYPE featureType) const;
503 static void cleanupContext(void);
506 static cv::Mutex currentContextMutex;
507 static ContextImpl* currentContext = NULL;
509 Context* Context::getContext()
511 if (currentContext == NULL)
513 if (!__initialized || !__deviceSelected)
515 cv::AutoLock lock(__initializedMutex);
518 if (initializeOpenCLDevices() == 0)
520 CV_Error(CV_OpenCLInitError, "OpenCL not available");
523 if (!__deviceSelected)
525 if (!selectOpenCLDevice())
527 CV_Error(CV_OpenCLInitError, "Can't select OpenCL device");
531 CV_Assert(currentContext != NULL);
533 return currentContext;
536 bool Context::supportsFeature(FEATURE_TYPE featureType) const
538 return ((ContextImpl*)this)->supportsFeature(featureType);
541 const DeviceInfo& Context::getDeviceInfo() const
543 return ((ContextImpl*)this)->deviceInfo;
546 const void* Context::getOpenCLContextPtr() const
548 return &(((ContextImpl*)this)->clContext);
551 const void* Context::getOpenCLCommandQueuePtr() const
553 return &(((ContextImpl*)this)->clCmdQueue);
556 const void* Context::getOpenCLDeviceIDPtr() const
558 return &(((ContextImpl*)this)->clDeviceID);
562 bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const
566 case FEATURE_CL_DOUBLE:
567 return deviceInfo.haveDoubleSupport;
568 case FEATURE_CL_UNIFIED_MEM:
569 return deviceInfo.isUnifiedMemory;
570 case FEATURE_CL_VER_1_2:
571 return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2);
573 CV_Error(CV_StsBadArg, "Invalid feature type");
578 static bool __termination = false;
581 ContextImpl::~ContextImpl()
584 // if process is on termination stage (ExitProcess was called and other threads were terminated)
585 // then disable command queue release because it may cause program hang
591 openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here
596 openCLSafeCall(clReleaseContext(clContext));
604 void clBlasTeardown();
606 void ContextImpl::cleanupContext(void)
611 cv::AutoLock lock(currentContextMutex);
613 delete currentContext;
614 currentContext = NULL;
617 void ContextImpl::setContext(const DeviceInfo* deviceInfo)
619 CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size());
622 cv::AutoLock lock(currentContextMutex);
625 if (currentContext->deviceInfo._id == deviceInfo->_id)
630 DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
631 CV_Assert(deviceInfo == &infoImpl.info);
634 cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 };
635 cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status);
636 openCLVerifyCall(status);
637 // TODO add CL_QUEUE_PROFILING_ENABLE
638 cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status);
639 openCLVerifyCall(status);
641 ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id);
642 ctx->clCmdQueue = clCmdQueue;
643 ctx->clContext = clContext;
645 ContextImpl* old = NULL;
647 cv::AutoLock lock(currentContextMutex);
648 old = currentContext;
649 currentContext = ctx;
657 int getOpenCLPlatforms(PlatformsInfo& platforms)
660 initializeOpenCLDevices();
664 for (size_t id = 0; id < global_platforms.size(); ++id)
666 PlatformInfoImpl& impl = global_platforms[id];
667 platforms.push_back(&impl.info);
670 return platforms.size();
673 int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform)
676 initializeOpenCLDevices();
682 case CVCL_DEVICE_TYPE_DEFAULT:
683 case CVCL_DEVICE_TYPE_CPU:
684 case CVCL_DEVICE_TYPE_GPU:
685 case CVCL_DEVICE_TYPE_ACCELERATOR:
686 case CVCL_DEVICE_TYPE_ALL:
692 if (platform == NULL)
694 for (size_t id = 0; id < global_devices.size(); ++id)
696 DeviceInfoImpl& deviceInfo = global_devices[id];
697 if (((int)deviceInfo.info.deviceType & deviceType) != 0)
699 devices.push_back(&deviceInfo.info);
705 for (size_t id = 0; id < platform->devices.size(); ++id)
707 const DeviceInfo* deviceInfo = platform->devices[id];
708 if (((int)deviceInfo->deviceType & deviceType) == deviceType)
710 devices.push_back(deviceInfo);
715 return (int)devices.size();
718 void setDevice(const DeviceInfo* info)
720 if (!__deviceSelected)
721 __deviceSelected = true;
723 ContextImpl::setContext(info);
726 bool supportsFeature(FEATURE_TYPE featureType)
728 return Context::getContext()->supportsFeature(featureType);
733 __Module() { /* moved to Context::getContext(): initializeOpenCLDevices(); */ }
734 ~__Module() { ContextImpl::cleanupContext(); }
736 static __Module __module;
743 #if defined(WIN32) && defined(CVAPI_EXPORTS)
746 BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved)
748 if (fdwReason == DLL_PROCESS_DETACH)
750 if (lpReserved != NULL) // called after ExitProcess() call
751 cv::ocl::__termination = true;