1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
45 #include "ocl_disabled.impl.hpp"
54 #include <iostream> // std::cerr
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
60 #include <opencv2/core/utils/configuration.private.hpp>
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
80 #define CV_OPENCL_SHOW_RUN_KERNELS 0
81 #define CV_OPENCL_TRACE_CHECK 0
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
86 #define CV_OPENCL_SHOW_SVM_LOG 0
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
91 # define LOG_BUFFER_POOL printf
93 # define LOG_BUFFER_POOL(...)
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
101 #define CV_OPENCL_SVM_TRACE_P(...)
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
111 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
116 #ifdef HAVE_OPENCL_SVM
117 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
119 #include "opencv2/core/opencl/opencl_svm.hpp"
122 #include "umatrix.hpp"
124 namespace cv { namespace ocl {
126 #define IMPLEMENT_REFCOUNTABLE() \
127 void addref() { CV_XADD(&refcount, 1); } \
128 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
131 static cv::utils::AllocatorStatistics opencl_allocator_stats;
133 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
134 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
136 return opencl_allocator_stats;
140 static bool isRaiseError()
142 static bool initialized = false;
143 static bool value = false;
146 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
153 #if CV_OPENCL_TRACE_CHECK
155 void traceOpenCLCheck(cl_int status, const char* message)
157 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
159 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
161 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
164 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
165 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
167 #define CV_OCL_CHECK_RESULT(check_result, msg) \
169 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
170 if (check_result != CL_SUCCESS) \
172 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_CHECK_RESULT must be const char*"); \
173 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
174 CV_Error(Error::OpenCLApiCallError, error_msg); \
178 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
180 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
183 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
184 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
185 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
189 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
190 if (check_result != CL_SUCCESS && isRaiseError()) \
192 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_DBG_CHECK_RESULT must be const char*"); \
193 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
194 CV_Error(Error::OpenCLApiCallError, error_msg); \
197 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
198 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
202 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
203 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
204 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
205 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
207 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
208 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
211 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
212 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
220 static const String getBuildExtraOptions()
222 static String param_buildExtraOptions;
223 static bool initialized = false;
226 param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
228 if (!param_buildExtraOptions.empty())
229 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
231 return param_buildExtraOptions;
234 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
235 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
240 UMat2D(const UMat& m)
242 offset = (int)m.offset;
255 UMat3D(const UMat& m)
257 offset = (int)m.offset;
258 step = (int)m.step.p[1];
259 slicestep = (int)m.step.p[0];
260 slices = (int)m.size.p[0];
272 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
273 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
275 static uint64 table[256];
276 static bool initialized = false;
280 for( int i = 0; i < 256; i++ )
283 for( int j = 0; j < 8; j++ )
284 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
291 for( size_t idx = 0; idx < size; idx++ )
292 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
297 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
298 struct OpenCLBinaryCacheConfigurator
300 cv::String cache_path_;
301 cv::String cache_lock_filename_;
302 cv::Ptr<utils::fs::FileLock> cache_lock_;
304 typedef std::map<std::string, std::string> ContextCacheType;
305 ContextCacheType prepared_contexts_;
306 Mutex mutex_prepared_contexts_;
308 OpenCLBinaryCacheConfigurator()
310 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
311 if (!CV_OPENCL_CACHE_ENABLE)
313 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
316 cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
317 if (cache_path_.empty())
319 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
325 if (cache_path_.empty())
327 if (cache_path_ == "disabled")
329 if (!utils::fs::createDirectories(cache_path_))
331 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
336 if (CV_OPENCL_CACHE_LOCK_ENABLE)
338 cache_lock_filename_ = cache_path_ + ".lock";
339 if (!utils::fs::exists(cache_lock_filename_))
341 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
342 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
343 if (!lock_filename.is_open())
345 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
352 cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
353 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
355 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
357 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
359 catch (const cv::Exception& e)
361 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
365 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
370 if (CV_OPENCL_CACHE_WRITE)
372 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
373 "(not safe for multiprocess environment)");
377 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
381 catch (const cv::Exception& e)
383 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
387 if (!cache_path_.empty())
389 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
391 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
392 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
396 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
404 cache_lock_filename_.clear();
405 cache_lock_.release();
408 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
409 const std::string& cleanup_prefix)
411 if (cache_path_.empty())
412 return std::string();
414 AutoLock lock(mutex_prepared_contexts_);
416 ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
417 if (found_it != prepared_contexts_.end())
418 return found_it->second;
420 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
422 std::string target_directory = cache_path_ + ctx_prefix + "/";
423 bool result = utils::fs::isDirectory(target_directory);
428 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
429 if (utils::fs::createDirectories(target_directory))
435 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
438 catch (const cv::Exception& e)
440 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
443 target_directory = result ? target_directory : std::string();
444 prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
446 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
450 std::vector<String> entries;
451 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
452 std::vector<String> remove_entries;
453 for (size_t i = 0; i < entries.size(); i++)
455 const String& name = entries[i];
456 if (0 == name.find(cleanup_prefix))
458 if (0 == name.find(ctx_prefix))
459 continue; // skip current
460 remove_entries.push_back(name);
463 if (!remove_entries.empty())
465 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
466 ? "Detected OpenCL cache directory for other version of OpenCL device."
467 : "Detected OpenCL cache directories for other versions of OpenCL device.")
468 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
469 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
470 for (size_t i = 0; i < remove_entries.size(); i++)
472 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
474 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
476 for (size_t i = 0; i < remove_entries.size(); i++)
478 const String& name = remove_entries[i];
479 cv::String path = utils::fs::join(cache_path_, name);
482 utils::fs::remove_all(path);
483 CV_LOG_WARNING(NULL, "Removed: " << path);
485 catch (const cv::Exception& e)
487 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
494 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
498 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
499 return target_directory;
502 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
504 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
507 class BinaryProgramFile
509 enum { MAX_ENTRIES = 64 };
511 typedef unsigned int uint32_t;
513 struct CV_DECL_ALIGNED(4) FileHeader
515 uint32_t sourceSignatureSize;
516 //char sourceSignature[];
519 struct CV_DECL_ALIGNED(4) FileTable
521 uint32_t numberOfEntries;
522 //uint32_t firstEntryOffset[];
525 struct CV_DECL_ALIGNED(4) FileEntry
527 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
534 const std::string fileName_;
535 const char* const sourceSignature_;
536 const size_t sourceSignatureSize_;
540 uint32_t entryOffsets[MAX_ENTRIES];
542 uint32_t getHash(const std::string& options)
544 uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
545 return hash & (MAX_ENTRIES - 1);
548 inline size_t getFileSize()
550 size_t pos = (size_t)f.tellg();
551 f.seekg(0, std::fstream::end);
552 size_t fileSize = (size_t)f.tellg();
553 f.seekg(pos, std::fstream::beg);
556 inline uint32_t readUInt32()
559 f.read((char*)&res, sizeof(uint32_t));
560 CV_Assert(!f.fail());
563 inline void writeUInt32(const uint32_t value)
566 f.write((char*)&v, sizeof(uint32_t));
567 CV_Assert(!f.fail());
570 inline void seekReadAbsolute(size_t pos)
572 f.seekg(pos, std::fstream::beg);
573 CV_Assert(!f.fail());
575 inline void seekReadRelative(size_t pos)
577 f.seekg(pos, std::fstream::cur);
578 CV_Assert(!f.fail());
581 inline void seekWriteAbsolute(size_t pos)
583 f.seekp(pos, std::fstream::beg);
584 CV_Assert(!f.fail());
590 if (0 != remove(fileName_.c_str()))
591 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
596 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
597 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
599 CV_StaticAssert(sizeof(uint32_t) == 4, "");
600 CV_Assert(sourceSignature_ != NULL);
601 CV_Assert(sourceSignatureSize_ > 0);
602 memset(entryOffsets, 0, sizeof(entryOffsets));
604 f.rdbuf()->pubsetbuf(0, 0); // disable buffering
605 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
606 if(f.is_open() && getFileSize() > 0)
608 bool isValid = false;
611 uint32_t fileSourceSignatureSize = readUInt32();
612 if (fileSourceSignatureSize == sourceSignatureSize_)
614 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
615 f.read(fileSourceSignature.data(), fileSourceSignatureSize);
618 CV_LOG_ERROR(NULL, "Unexpected EOF");
620 else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
627 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
630 catch (const cv::Exception& e)
632 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
636 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
649 bool read(const std::string& key, std::vector<char>& buf)
654 size_t fileSize = getFileSize();
657 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
664 uint32_t fileSourceSignatureSize = readUInt32();
665 CV_Assert(fileSourceSignatureSize > 0);
666 seekReadRelative(fileSourceSignatureSize);
668 uint32_t numberOfEntries = readUInt32();
669 CV_Assert(numberOfEntries > 0);
670 if (numberOfEntries != MAX_ENTRIES)
672 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
676 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
677 CV_Assert(!f.fail());
679 uint32_t entryNum = getHash(key);
681 uint32_t entryOffset = entryOffsets[entryNum];
683 while (entryOffset > 0)
685 seekReadAbsolute(entryOffset);
686 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
687 f.read((char*)&entry, sizeof(entry));
688 CV_Assert(!f.fail());
689 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
690 if (key.size() == entry.keySize)
692 if (entry.keySize > 0)
694 f.read(fileKey.data(), entry.keySize);
695 CV_Assert(!f.fail());
697 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
699 buf.resize(entry.dataSize);
700 f.read(&buf[0], entry.dataSize);
701 CV_Assert(!f.fail());
703 CV_LOG_VERBOSE(NULL, 0, "Read...");
707 if (entry.nextEntryFileOffset == 0)
709 entryOffset = entry.nextEntryFileOffset;
714 bool write(const std::string& key, std::vector<char>& buf)
718 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
721 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
724 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
730 size_t fileSize = getFileSize();
734 seekWriteAbsolute(0);
735 writeUInt32((uint32_t)sourceSignatureSize_);
736 f.write(sourceSignature_, sourceSignatureSize_);
737 CV_Assert(!f.fail());
739 writeUInt32(MAX_ENTRIES);
740 memset(entryOffsets, 0, sizeof(entryOffsets));
741 f.write((char*)entryOffsets, sizeof(entryOffsets));
742 CV_Assert(!f.fail());
744 CV_Assert(!f.fail());
746 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
747 CV_Assert(f.is_open());
748 fileSize = getFileSize();
753 uint32_t fileSourceSignatureSize = readUInt32();
754 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
755 seekReadRelative(fileSourceSignatureSize);
757 uint32_t numberOfEntries = readUInt32();
758 CV_Assert(numberOfEntries > 0);
759 if (numberOfEntries != MAX_ENTRIES)
761 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
765 size_t tableEntriesOffset = (size_t)f.tellg();
766 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
767 CV_Assert(!f.fail());
769 uint32_t entryNum = getHash(key);
771 uint32_t entryOffset = entryOffsets[entryNum];
773 while (entryOffset > 0)
775 seekReadAbsolute(entryOffset);
776 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
777 f.read((char*)&entry, sizeof(entry));
778 CV_Assert(!f.fail());
779 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
780 if (key.size() == entry.keySize)
782 if (entry.keySize > 0)
784 f.read(fileKey.data(), entry.keySize);
785 CV_Assert(!f.fail());
787 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
790 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
794 if (entry.nextEntryFileOffset == 0)
796 entryOffset = entry.nextEntryFileOffset;
801 seekWriteAbsolute(entryOffset);
802 entry.nextEntryFileOffset = (uint32_t)fileSize;
803 f.write((char*)&entry, sizeof(entry));
804 CV_Assert(!f.fail());
808 entryOffsets[entryNum] = (uint32_t)fileSize;
809 seekWriteAbsolute(tableEntriesOffset);
810 f.write((char*)entryOffsets, sizeof(entryOffsets));
811 CV_Assert(!f.fail());
813 seekWriteAbsolute(fileSize);
814 entry.nextEntryFileOffset = 0;
815 entry.dataSize = (uint32_t)buf.size();
816 entry.keySize = (uint32_t)key.size();
817 f.write((char*)&entry, sizeof(entry));
818 CV_Assert(!f.fail());
819 f.write(key.c_str(), entry.keySize);
820 CV_Assert(!f.fail());
821 f.write(&buf[0], entry.dataSize);
822 CV_Assert(!f.fail());
824 CV_Assert(!f.fail());
825 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
829 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
833 struct OpenCLExecutionContext::Impl
835 ocl::Context context_;
836 int device_; // device index in context
843 void _init_device(cl_device_id deviceID)
846 int ndevices = (int)context_.ndevices();
847 CV_Assert(ndevices > 0);
849 for (int i = 0; i < ndevices; i++)
851 ocl::Device d = context_.device(i);
852 cl_device_id dhandle = (cl_device_id)d.ptr();
853 if (dhandle == deviceID)
860 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
863 void _init_device(const ocl::Device& device)
865 CV_Assert(device.ptr());
866 int ndevices = (int)context_.ndevices();
867 CV_Assert(ndevices > 0);
869 for (int i = 0; i < ndevices; i++)
871 ocl::Device d = context_.device(i);
872 if (d.getImpl() == device.getImpl())
879 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
883 Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID)
884 : device_(0), useOpenCL_(-1)
886 CV_UNUSED(platformID);
890 context_ = Context::fromHandle(context);
891 _init_device(deviceID);
892 queue_ = Queue(context_, context_.device(device_));
895 Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue)
896 : device_(0), useOpenCL_(-1)
898 CV_Assert(context.ptr());
899 CV_Assert(device.ptr());
902 _init_device(device);
906 Impl(const ocl::Context& context, const ocl::Device& device)
907 : device_(0), useOpenCL_(-1)
909 CV_Assert(context.ptr());
910 CV_Assert(device.ptr());
913 _init_device(device);
914 queue_ = Queue(context_, context_.device(device_));
917 Impl(const ocl::Context& context, const int device, const ocl::Queue& queue)
925 Impl(const Impl& other)
926 : context_(other.context_)
927 , device_(other.device_)
928 , queue_(other.queue_)
934 inline bool useOpenCL() const { return const_cast<Impl*>(this)->useOpenCL(); }
942 if (!context_.empty() && context_.ndevices() > 0)
944 const Device& d = context_.device(device_);
945 useOpenCL_ = d.available();
948 catch (const cv::Exception&)
953 CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context");
955 return useOpenCL_ > 0;
958 void setUseOpenCL(bool flag)
966 static const std::shared_ptr<Impl>& getInitializedExecutionContext()
970 CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context");
972 static bool initialized = false;
973 static std::shared_ptr<Impl> g_primaryExecutionContext;
977 cv::AutoLock lock(getInitializationMutex());
980 CV_LOG_INFO(NULL, "OpenCL: creating new execution context...");
983 Context c = ocl::Context::create(std::string());
987 auto& d = c.device(deviceId);
990 auto q = ocl::Queue(c, d);
993 CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue");
997 g_primaryExecutionContext = std::make_shared<Impl>(c, deviceId, q);
998 CV_LOG_INFO(NULL, "OpenCL: device=" << d.name());
1003 CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)");
1008 CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled");
1011 catch (const std::exception& e)
1013 CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what());
1017 CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception");
1022 return g_primaryExecutionContext;
1026 Context& OpenCLExecutionContext::getContext() const
1031 Device& OpenCLExecutionContext::getDevice() const
1034 return p->context_.device(p->device_);
1036 Queue& OpenCLExecutionContext::getQueue() const
1042 bool OpenCLExecutionContext::useOpenCL() const
1045 return p->useOpenCL();
1048 void OpenCLExecutionContext::setUseOpenCL(bool flag)
1051 p->setUseOpenCL(flag);
1055 OpenCLExecutionContext& OpenCLExecutionContext::getCurrent()
1057 CV_TRACE_FUNCTION();
1058 CoreTLSData& data = getCoreTlsData();
1059 OpenCLExecutionContext& c = data.oclExecutionContext;
1060 if (!data.oclExecutionContextInitialized)
1062 data.oclExecutionContextInitialized = true;
1063 if (c.empty() && haveOpenCL())
1064 c.p = Impl::getInitializedExecutionContext();
1070 OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef()
1072 CV_TRACE_FUNCTION();
1073 CoreTLSData& data = getCoreTlsData();
1074 OpenCLExecutionContext& c = data.oclExecutionContext;
1078 void OpenCLExecutionContext::bind() const
1080 CV_TRACE_FUNCTION();
1082 CoreTLSData& data = getCoreTlsData();
1083 data.oclExecutionContext = *this;
1084 data.oclExecutionContextInitialized = true;
1085 data.useOpenCL = p->useOpenCL_; // propagate "-1", avoid call useOpenCL()
1089 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const
1091 CV_TRACE_FUNCTION();
1093 const Queue q(getContext(), getDevice());
1094 return cloneWithNewQueue(q);
1097 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const
1099 CV_TRACE_FUNCTION();
1101 CV_Assert(q.ptr() != NULL);
1102 OpenCLExecutionContext c;
1103 c.p = std::make_shared<Impl>(p->context_, p->device_, q);
1108 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue)
1110 CV_TRACE_FUNCTION();
1112 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1114 CV_Assert(!context.empty());
1115 CV_Assert(context.ptr());
1116 CV_Assert(!device.empty());
1117 CV_Assert(device.ptr());
1118 OpenCLExecutionContext ctx;
1119 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device, queue);
1125 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device)
1127 CV_TRACE_FUNCTION();
1129 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1131 CV_Assert(!context.empty());
1132 CV_Assert(context.ptr());
1133 CV_Assert(!device.empty());
1134 CV_Assert(device.ptr());
1135 OpenCLExecutionContext ctx;
1136 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device);
1141 void OpenCLExecutionContext::release()
1143 CV_TRACE_FUNCTION();
1148 // true if we have initialized OpenCL subsystem with available platforms
1149 static bool g_isOpenCVActivated = false;
1153 CV_TRACE_FUNCTION();
1154 static bool g_isOpenCLInitialized = false;
1155 static bool g_isOpenCLAvailable = false;
1157 if (!g_isOpenCLInitialized)
1159 CV_TRACE_REGION("Init_OpenCL_Runtime");
1160 const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
1163 if (cv::String(envPath) == "disabled")
1165 g_isOpenCLAvailable = false;
1166 g_isOpenCLInitialized = true;
1171 cv::AutoLock lock(getInitializationMutex());
1172 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
1176 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1177 g_isOpenCVActivated = n > 0;
1178 CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms");
1182 g_isOpenCLAvailable = false;
1184 g_isOpenCLInitialized = true;
1186 return g_isOpenCLAvailable;
1191 CoreTLSData& data = getCoreTlsData();
1192 if (data.useOpenCL < 0)
1199 auto c = OpenCLExecutionContext::getCurrent();
1200 data.useOpenCL = c.useOpenCL();
1205 CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context");
1208 return data.useOpenCL > 0;
1211 bool isOpenCLActivated()
1213 if (!g_isOpenCVActivated)
1214 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
1218 void setUseOpenCL(bool flag)
1220 CV_TRACE_FUNCTION();
1222 CoreTLSData& data = getCoreTlsData();
1223 auto& c = OpenCLExecutionContext::getCurrentRef();
1226 c.setUseOpenCL(flag);
1227 data.useOpenCL = c.useOpenCL();
1234 data.useOpenCL = -1; // enabled by default (if context is not initialized)
1240 #ifdef HAVE_CLAMDBLAS
1245 static AmdBlasHelper & getInstance()
1247 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
1250 bool isAvailable() const
1252 return g_isAmdBlasAvailable;
1259 clAmdBlasTeardown();
1267 if (!g_isAmdBlasInitialized)
1269 AutoLock lock(getInitializationMutex());
1271 if (!g_isAmdBlasInitialized)
1277 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
1281 g_isAmdBlasAvailable = false;
1285 g_isAmdBlasAvailable = false;
1287 g_isAmdBlasInitialized = true;
1293 static bool g_isAmdBlasInitialized;
1294 static bool g_isAmdBlasAvailable;
1297 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1298 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1302 return AmdBlasHelper::getInstance().isAvailable();
1314 #ifdef HAVE_CLAMDFFT
1319 static AmdFftHelper & getInstance()
1321 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
1324 bool isAvailable() const
1326 return g_isAmdFftAvailable;
1333 // clAmdFftTeardown();
1341 if (!g_isAmdFftInitialized)
1343 AutoLock lock(getInitializationMutex());
1345 if (!g_isAmdFftInitialized)
1351 cl_uint major, minor, patch;
1352 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1354 // it throws exception in case AmdFft binaries are not found
1355 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1356 g_isAmdFftAvailable = true;
1358 catch (const Exception &)
1360 g_isAmdFftAvailable = false;
1364 g_isAmdFftAvailable = false;
1366 g_isAmdFftInitialized = true;
1372 static clAmdFftSetupData setupData;
1373 static bool g_isAmdFftInitialized;
1374 static bool g_isAmdFftAvailable;
1377 clAmdFftSetupData AmdFftHelper::setupData;
1378 bool AmdFftHelper::g_isAmdFftAvailable = false;
1379 bool AmdFftHelper::g_isAmdFftInitialized = false;
1383 return AmdFftHelper::getInstance().isAvailable();
1397 #ifdef HAVE_OPENCL_SVM
1406 Queue::getDefault().finish();
1409 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1411 struct Platform::Impl
1417 initialized = false;
1426 //cl_uint num_entries
1428 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1434 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1436 vendor = String(buf);
1443 IMPLEMENT_REFCOUNTABLE();
1445 cl_platform_id handle;
1450 Platform::Platform()
1455 Platform::~Platform()
1461 Platform::Platform(const Platform& pl)
1468 Platform& Platform::operator = (const Platform& pl)
1470 Impl* newp = (Impl*)pl.p;
1479 void* Platform::ptr() const
1481 return p ? p->handle : 0;
1484 Platform& Platform::getDefault()
1486 CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms");
1496 /////////////////////////////////////// Device ////////////////////////////////////////////
1498 // deviceVersion has format
1499 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1501 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1502 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1503 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1506 if (10 >= deviceVersion.length())
1508 const char *pstr = deviceVersion.c_str();
1509 if (0 != strncmp(pstr, "OpenCL ", 7))
1511 size_t ppos = deviceVersion.find('.', 7);
1512 if (String::npos == ppos)
1514 String temp = deviceVersion.substr(7, ppos - 7);
1515 major = atoi(temp.c_str());
1516 temp = deviceVersion.substr(ppos + 1);
1517 minor = atoi(temp.c_str());
1528 cl_device_id device = (cl_device_id)d;
1530 CV_OCL_CHECK(clRetainDevice(device)); // increment reference counter on success only
1538 void _init(cl_device_id d)
1540 handle = (cl_device_id)d;
1542 name_ = getStrProp(CL_DEVICE_NAME);
1543 version_ = getStrProp(CL_DEVICE_VERSION);
1544 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1545 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1546 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1547 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1548 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1549 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1550 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1551 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1553 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1554 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1557 while (pos < extensions_.size())
1559 size_t pos2 = extensions_.find(' ', pos);
1560 if (pos2 == String::npos)
1561 pos2 = extensions_.size();
1564 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1565 extensions_set_.insert(extensionName);
1570 intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1572 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1573 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1574 vendorName_ == "AMD")
1575 vendorID_ = VENDOR_AMD;
1576 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1577 vendorID_ = VENDOR_INTEL;
1578 else if (vendorName_ == "NVIDIA Corporation")
1579 vendorID_ = VENDOR_NVIDIA;
1581 vendorID_ = UNKNOWN_VENDOR;
1583 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1584 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1586 const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1587 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1588 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1589 maxWorkGroupSize_ = new_maxWorkGroupSize;
1592 if (isExtensionSupported("cl_khr_spir"))
1594 #ifndef CL_DEVICE_SPIR_VERSIONS
1595 #define CL_DEVICE_SPIR_VERSIONS 0x40E0
1597 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1598 std::cout << spir_versions << std::endl;
1606 if (!cv::__termination)
1611 CV_OCL_CHECK(clReleaseDevice(handle));
1617 template<typename _TpCL, typename _TpOut>
1618 _TpOut getProp(cl_device_info prop) const
1623 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1624 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1627 bool getBoolProp(cl_device_info prop) const
1629 cl_bool temp = CL_FALSE;
1632 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1633 sz == sizeof(temp) ? temp != 0 : false;
1636 String getStrProp(cl_device_info prop) const
1640 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1641 sz < sizeof(buf) ? String(buf) : String();
1644 bool isExtensionSupported(const std::string& extensionName) const
1646 return extensions_set_.count(extensionName) > 0;
1650 IMPLEMENT_REFCOUNTABLE();
1652 cl_device_id handle;
1656 std::string extensions_;
1657 int doubleFPConfig_;
1658 bool hostUnifiedMemory_;
1659 int maxComputeUnits_;
1660 size_t maxWorkGroupSize_;
1663 int deviceVersionMajor_;
1664 int deviceVersionMinor_;
1665 String driverVersion_;
1668 bool intelSubgroupsSupport_;
1670 std::set<std::string> extensions_set_;
1679 Device::Device(void* d)
1685 Device::Device(const Device& d)
1692 Device& Device::operator = (const Device& d)
1694 Impl* newp = (Impl*)d.p;
1709 void Device::set(void* d)
1716 CV_OCL_CHECK(clReleaseDevice((cl_device_id)d));
1720 Device Device::fromHandle(void* d)
1726 void* Device::ptr() const
1728 return p ? p->handle : 0;
1731 String Device::name() const
1732 { return p ? p->name_ : String(); }
1734 String Device::extensions() const
1735 { return p ? String(p->extensions_) : String(); }
1737 bool Device::isExtensionSupported(const String& extensionName) const
1738 { return p ? p->isExtensionSupported(extensionName) : false; }
1740 String Device::version() const
1741 { return p ? p->version_ : String(); }
1743 String Device::vendorName() const
1744 { return p ? p->vendorName_ : String(); }
1746 int Device::vendorID() const
1747 { return p ? p->vendorID_ : 0; }
1749 String Device::OpenCL_C_Version() const
1750 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1752 String Device::OpenCLVersion() const
1753 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1755 int Device::deviceVersionMajor() const
1756 { return p ? p->deviceVersionMajor_ : 0; }
1758 int Device::deviceVersionMinor() const
1759 { return p ? p->deviceVersionMinor_ : 0; }
1761 String Device::driverVersion() const
1762 { return p ? p->driverVersion_ : String(); }
1764 int Device::type() const
1765 { return p ? p->type_ : 0; }
1767 int Device::addressBits() const
1768 { return p ? p->addressBits_ : 0; }
1770 bool Device::available() const
1771 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1773 bool Device::compilerAvailable() const
1774 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1776 bool Device::linkerAvailable() const
1777 #ifdef CL_VERSION_1_2
1778 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1780 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1783 int Device::doubleFPConfig() const
1784 { return p ? p->doubleFPConfig_ : 0; }
1786 int Device::singleFPConfig() const
1787 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1789 int Device::halfFPConfig() const
1790 #ifdef CL_VERSION_1_2
1791 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1793 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1796 bool Device::endianLittle() const
1797 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1799 bool Device::errorCorrectionSupport() const
1800 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1802 int Device::executionCapabilities() const
1803 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1805 size_t Device::globalMemCacheSize() const
1806 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1808 int Device::globalMemCacheType() const
1809 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1811 int Device::globalMemCacheLineSize() const
1812 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1814 size_t Device::globalMemSize() const
1815 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1817 size_t Device::localMemSize() const
1818 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1820 int Device::localMemType() const
1821 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1823 bool Device::hostUnifiedMemory() const
1824 { return p ? p->hostUnifiedMemory_ : false; }
1826 bool Device::imageSupport() const
1827 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1829 bool Device::imageFromBufferSupport() const
1831 return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1834 uint Device::imagePitchAlignment() const
1836 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1837 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1843 uint Device::imageBaseAddressAlignment() const
1845 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1846 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1852 size_t Device::image2DMaxWidth() const
1853 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1855 size_t Device::image2DMaxHeight() const
1856 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1858 size_t Device::image3DMaxWidth() const
1859 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1861 size_t Device::image3DMaxHeight() const
1862 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1864 size_t Device::image3DMaxDepth() const
1865 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1867 size_t Device::imageMaxBufferSize() const
1868 #ifdef CL_VERSION_1_2
1869 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1871 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1874 size_t Device::imageMaxArraySize() const
1875 #ifdef CL_VERSION_1_2
1876 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1878 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1881 bool Device::intelSubgroupsSupport() const
1882 { return p ? p->intelSubgroupsSupport_ : false; }
1884 int Device::maxClockFrequency() const
1885 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1887 int Device::maxComputeUnits() const
1888 { return p ? p->maxComputeUnits_ : 0; }
1890 int Device::maxConstantArgs() const
1891 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1893 size_t Device::maxConstantBufferSize() const
1894 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1896 size_t Device::maxMemAllocSize() const
1897 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1899 size_t Device::maxParameterSize() const
1900 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1902 int Device::maxReadImageArgs() const
1903 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1905 int Device::maxWriteImageArgs() const
1906 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1908 int Device::maxSamplers() const
1909 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1911 size_t Device::maxWorkGroupSize() const
1912 { return p ? p->maxWorkGroupSize_ : 0; }
1914 int Device::maxWorkItemDims() const
1915 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1917 void Device::maxWorkItemSizes(size_t* sizes) const
1921 const int MAX_DIMS = 32;
1923 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1924 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1928 int Device::memBaseAddrAlign() const
1929 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1931 int Device::nativeVectorWidthChar() const
1932 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1934 int Device::nativeVectorWidthShort() const
1935 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1937 int Device::nativeVectorWidthInt() const
1938 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1940 int Device::nativeVectorWidthLong() const
1941 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1943 int Device::nativeVectorWidthFloat() const
1944 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1946 int Device::nativeVectorWidthDouble() const
1947 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1949 int Device::nativeVectorWidthHalf() const
1950 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1952 int Device::preferredVectorWidthChar() const
1953 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1955 int Device::preferredVectorWidthShort() const
1956 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1958 int Device::preferredVectorWidthInt() const
1959 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1961 int Device::preferredVectorWidthLong() const
1962 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
1964 int Device::preferredVectorWidthFloat() const
1965 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
1967 int Device::preferredVectorWidthDouble() const
1968 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
1970 int Device::preferredVectorWidthHalf() const
1971 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
1973 size_t Device::printfBufferSize() const
1974 #ifdef CL_VERSION_1_2
1975 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
1977 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1981 size_t Device::profilingTimerResolution() const
1982 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
1984 const Device& Device::getDefault()
1986 auto& c = OpenCLExecutionContext::getCurrent();
1989 return c.getDevice();
1992 static Device dummy;
1996 ////////////////////////////////////// Context ///////////////////////////////////////////////////
1998 template <typename Functor, typename ObjectType>
1999 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2002 cl_int err = f(obj, name, 0, NULL, &required);
2003 if (err != CL_SUCCESS)
2009 AutoBuffer<char> buf(required + 1);
2010 char* ptr = buf.data(); // cleanup is not needed
2011 err = f(obj, name, required, ptr, NULL);
2012 if (err != CL_SUCCESS)
2020 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2025 std::istringstream ss(s);
2029 std::getline(ss, item, delim);
2030 elems.push_back(item);
2034 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2036 // Sample: AMD:GPU:Tahiti
2037 // Sample: :GPU|CPU: = '' = ':' = '::'
2038 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2039 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2041 std::vector<std::string> parts;
2042 split(configurationStr, ':', parts);
2043 if (parts.size() > 3)
2045 CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr);
2048 if (parts.size() > 2)
2049 deviceNameOrID = parts[2];
2050 if (parts.size() > 1)
2052 split(parts[1], '|', deviceTypes);
2054 if (parts.size() > 0)
2056 platform = parts[0];
2061 #if defined WINRT || defined _WIN32_WCE
2062 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2064 CV_UNUSED(configuration)
2068 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2070 std::string platform, deviceName;
2071 std::vector<std::string> deviceTypes;
2074 configuration = getenv("OPENCV_OPENCL_DEVICE");
2076 if (configuration &&
2077 (strcmp(configuration, "disabled") == 0 ||
2078 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
2084 if (deviceName.length() == 1)
2085 // We limit ID range to 0..9, because we want to write:
2086 // - '2500' to mean i5-2500
2087 // - '8350' to mean AMD FX-8350
2088 // - '650' to mean GeForce 650
2089 // To extend ID range change condition to '> 0'
2092 for (size_t i = 0; i < deviceName.length(); i++)
2094 if (!isdigit(deviceName[i]))
2102 deviceID = atoi(deviceName.c_str());
2108 std::vector<cl_platform_id> platforms;
2110 cl_uint numPlatforms = 0;
2111 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
2113 if (numPlatforms == 0)
2115 platforms.resize((size_t)numPlatforms);
2116 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
2117 platforms.resize(numPlatforms);
2120 int selectedPlatform = -1;
2121 if (platform.length() > 0)
2123 for (size_t i = 0; i < platforms.size(); i++)
2126 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
2127 if (name.find(platform) != std::string::npos)
2129 selectedPlatform = (int)i;
2133 if (selectedPlatform == -1)
2135 CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform);
2139 if (deviceTypes.size() == 0)
2143 deviceTypes.push_back("GPU");
2145 deviceTypes.push_back("CPU");
2148 deviceTypes.push_back("ALL");
2150 for (size_t t = 0; t < deviceTypes.size(); t++)
2153 std::string tempStrDeviceType = deviceTypes[t];
2154 std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), details::char_tolower);
2156 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2157 deviceType = Device::TYPE_GPU;
2158 else if (tempStrDeviceType == "cpu")
2159 deviceType = Device::TYPE_CPU;
2160 else if (tempStrDeviceType == "accelerator")
2161 deviceType = Device::TYPE_ACCELERATOR;
2162 else if (tempStrDeviceType == "all")
2163 deviceType = Device::TYPE_ALL;
2166 CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]);
2170 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2171 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2172 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2176 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2177 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2179 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
2183 size_t base = devices.size();
2184 devices.resize(base + count);
2185 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2186 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2188 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
2192 for (size_t i = (isID ? deviceID : 0);
2193 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2197 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
2198 cl_bool useGPU = true;
2199 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2201 cl_bool isIGPU = CL_FALSE;
2202 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
2203 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2205 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2207 // TODO check for OpenCL 1.1
2215 return NULL; // suppress messages on stderr
2217 std::ostringstream msg;
2218 msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl
2219 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2220 << " Device types:";
2221 for (size_t t = 0; t < deviceTypes.size(); t++)
2222 msg << ' ' << deviceTypes[t];
2224 msg << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName);
2226 CV_LOG_ERROR(NULL, msg.str());
2231 #ifdef HAVE_OPENCL_SVM
2234 enum AllocatorFlags { // don't use first 16 bits
2235 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
2236 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
2237 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
2238 OPENCL_SVM_BUFFER_MASK = 3 << 16,
2239 OPENCL_SVM_BUFFER_MAP = 4 << 16
2242 static bool checkForceSVMUmatUsage()
2244 static bool initialized = false;
2245 static bool force = false;
2248 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
2253 static bool checkDisableSVMUMatUsage()
2255 static bool initialized = false;
2256 static bool force = false;
2259 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
2264 static bool checkDisableSVM()
2266 static bool initialized = false;
2267 static bool force = false;
2270 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
2275 // see SVMCapabilities
2276 static unsigned int getSVMCapabilitiesMask()
2278 static bool initialized = false;
2279 static unsigned int mask = 0;
2282 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
2283 if (envValue == NULL)
2285 return ~0U; // all bits 1
2287 mask = atoi(envValue);
2295 static size_t getProgramCountLimit()
2297 static bool initialized = false;
2298 static size_t count = 0;
2301 count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
2307 static int g_contextId = 0;
2309 class OpenCLBufferPoolImpl;
2310 class OpenCLSVMBufferPoolImpl;
2312 struct Context::Impl
2314 static Context::Impl* get(Context& context) { return context.p; }
2316 typedef std::deque<Context::Impl*> container_t;
2317 static container_t& getGlobalContainer()
2319 // never delete this container (Impl lifetime is greater due to TLS storage)
2320 static container_t* g_contexts = new container_t();
2325 Impl(const std::string& configuration_)
2327 , contextId(CV_XADD(&g_contextId, 1))
2328 , configuration(configuration_)
2330 #ifdef HAVE_OPENCL_SVM
2331 , svmInitialized(false)
2335 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
2337 cv::AutoLock lock(cv::getInitializationMutex());
2338 auto& container = getGlobalContainer();
2339 container.resize(std::max(container.size(), (size_t)contextId + 1));
2340 container[contextId] = this;
2346 if (!cv::__termination)
2351 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2358 cv::AutoLock lock(cv::getInitializationMutex());
2359 auto& container = getGlobalContainer();
2360 CV_CheckLT((size_t)contextId, container.size(), "");
2361 container[contextId] = NULL;
2365 void init_device_list()
2369 cl_uint ndevices = 0;
2370 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL));
2371 CV_Assert(ndevices > 0);
2373 cv::AutoBuffer<cl_device_id> cl_devices(ndevices);
2374 size_t devices_ret_size = 0;
2375 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size));
2376 CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), "");
2379 for (unsigned i = 0; i < ndevices; i++)
2381 devices.emplace_back(Device::fromHandle(cl_devices[i]));
2385 void __init_buffer_pools(); // w/o synchronization
2386 void _init_buffer_pools() const
2390 cv::AutoLock lock(cv::getInitializationMutex());
2393 const_cast<Impl*>(this)->__init_buffer_pools();
2398 static Impl* findContext(const std::string& configuration)
2400 CV_TRACE_FUNCTION();
2401 cv::AutoLock lock(cv::getInitializationMutex());
2402 auto& container = getGlobalContainer();
2403 if (configuration.empty() && !container.empty())
2404 return container[0];
2405 for (auto it = container.begin(); it != container.end(); ++it)
2408 if (i && i->configuration == configuration)
2416 static Impl* findOrCreateContext(const std::string& configuration_)
2418 CV_TRACE_FUNCTION();
2419 std::string configuration = configuration_;
2420 if (configuration_.empty())
2422 const char* c = getenv("OPENCV_OPENCL_DEVICE");
2426 Impl* impl = findContext(configuration);
2429 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2433 cl_device_id d = selectOpenCLDevice(configuration.empty() ? NULL : configuration.c_str());
2437 impl = new Impl(configuration);
2440 impl->createFromDevice(d);
2453 static Impl* findOrCreateContext(cl_context h)
2455 CV_TRACE_FUNCTION();
2459 std::string configuration = cv::format("@ctx-%p", (void*)h);
2460 Impl* impl = findContext(configuration);
2463 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2468 impl = new Impl(configuration);
2471 CV_OCL_CHECK(clRetainContext(h));
2473 impl->init_device_list();
2483 static Impl* findOrCreateContext(const ocl::Device& device)
2485 CV_TRACE_FUNCTION();
2487 CV_Assert(!device.empty());
2488 cl_device_id d = (cl_device_id)device.ptr();
2491 std::string configuration = cv::format("@dev-%p", (void*)d);
2492 Impl* impl = findContext(configuration);
2495 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2500 impl = new Impl(configuration);
2503 impl->createFromDevice(d);
2504 CV_Assert(impl->handle);
2516 CV_TRACE_FUNCTION();
2517 cl_device_id d = selectOpenCLDevice();
2522 createFromDevice(d);
2525 void createFromDevice(cl_device_id d)
2527 CV_TRACE_FUNCTION();
2528 CV_Assert(handle == NULL);
2530 cl_platform_id pl = NULL;
2531 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
2533 cl_context_properties prop[] =
2535 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2539 // !!! in the current implementation force the number of devices to 1 !!!
2543 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2544 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
2546 bool ok = handle != 0 && status == CL_SUCCESS;
2556 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2558 void unloadProg(Program& prog)
2560 cv::AutoLock lock(program_cache_mutex);
2561 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2563 phash_t::iterator it = phash.find(*i);
2564 if (it != phash.end())
2566 if (it->second.ptr() == prog.ptr())
2576 std::string& getPrefixString()
2580 cv::AutoLock lock(program_cache_mutex);
2583 CV_Assert(!devices.empty());
2584 const Device& d = devices[0];
2585 int bits = d.addressBits();
2586 if (bits > 0 && bits != 64)
2587 prefix = cv::format("%d-bit--", bits);
2588 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2590 for (size_t i = 0; i < prefix.size(); i++)
2593 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2603 std::string& getPrefixBase()
2605 if (prefix_base.empty())
2607 cv::AutoLock lock(program_cache_mutex);
2608 if (prefix_base.empty())
2610 const Device& d = devices[0];
2611 int bits = d.addressBits();
2612 if (bits > 0 && bits != 64)
2613 prefix_base = cv::format("%d-bit--", bits);
2614 prefix_base += d.vendorName() + "--" + d.name() + "--";
2616 for (size_t i = 0; i < prefix_base.size(); i++)
2618 char c = prefix_base[i];
2619 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2621 prefix_base[i] = '_';
2629 IMPLEMENT_REFCOUNTABLE();
2631 const int contextId; // global unique ID
2632 const std::string configuration;
2635 std::vector<Device> devices;
2638 std::string prefix_base;
2640 cv::Mutex program_cache_mutex;
2641 typedef std::map<std::string, Program> phash_t;
2643 typedef std::list<cv::String> CacheList;
2644 CacheList cacheList;
2646 std::shared_ptr<OpenCLBufferPoolImpl> bufferPool_;
2647 std::shared_ptr<OpenCLBufferPoolImpl> bufferPoolHostPtr_;
2648 OpenCLBufferPoolImpl& getBufferPool() const
2650 _init_buffer_pools();
2651 CV_DbgAssert(bufferPool_);
2652 return *bufferPool_.get();
2654 OpenCLBufferPoolImpl& getBufferPoolHostPtr() const
2656 _init_buffer_pools();
2657 CV_DbgAssert(bufferPoolHostPtr_);
2658 return *bufferPoolHostPtr_.get();
2661 #ifdef HAVE_OPENCL_SVM
2662 bool svmInitialized;
2665 svm::SVMCapabilities svmCapabilities;
2666 svm::SVMFunctions svmFunctions;
2670 CV_Assert(handle != NULL);
2671 const Device& device = devices[0];
2672 cl_device_svm_capabilities deviceCaps = 0;
2673 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2674 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2675 if (status != CL_SUCCESS)
2677 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2680 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2681 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2682 svmCapabilities.value_ =
2683 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2684 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2685 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2686 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2687 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2688 if (svmCapabilities.value_ == 0)
2690 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2696 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2697 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2700 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2701 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2706 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2707 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2709 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2710 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2715 ((int*)ptr)[0] = 100;
2719 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2722 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2724 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2725 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2730 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2735 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2736 clSVMFree(handle, ptr);
2739 clSVMFree(handle, ptr);
2740 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2741 svmFunctions.fn_clSVMFree = clSVMFree;
2742 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2743 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2744 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2745 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2746 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2747 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2748 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2752 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2755 // Try HSA extension
2756 String extensions = device.extensions();
2757 if (extensions.find("cl_amd_svm") == String::npos)
2759 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2762 cl_platform_id p = NULL;
2763 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2764 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2765 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2766 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2767 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2768 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2769 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2770 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2771 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2772 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2773 CV_Assert(svmFunctions.isValid());
2777 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2782 svmAvailable = true;
2783 svmEnabled = !svm::checkDisableSVM();
2784 svmInitialized = true;
2785 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2788 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2789 svmAvailable = false;
2791 svmCapabilities.value_ = 0;
2792 svmInitialized = true;
2793 svmFunctions.fn_clSVMAlloc = NULL;
2797 std::shared_ptr<OpenCLSVMBufferPoolImpl> bufferPoolSVM_;
2799 OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const
2801 _init_buffer_pools();
2802 CV_DbgAssert(bufferPoolSVM_);
2803 return *bufferPoolSVM_.get();
2807 friend class Program;
2822 Context::Context(int dtype)
2828 void Context::release()
2837 bool Context::create()
2842 p = Impl::findOrCreateContext(std::string());
2850 bool Context::create(int dtype)
2855 if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL)
2857 p = Impl::findOrCreateContext("");
2859 else if (dtype == CL_DEVICE_TYPE_GPU)
2861 p = Impl::findOrCreateContext(":GPU:");
2863 else if (dtype == CL_DEVICE_TYPE_CPU)
2865 p = Impl::findOrCreateContext(":CPU:");
2869 CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype);
2871 if (p && !p->handle)
2878 Context::Context(const Context& c)
2885 Context& Context::operator = (const Context& c)
2887 Impl* newp = (Impl*)c.p;
2896 void* Context::ptr() const
2898 return p == NULL ? NULL : p->handle;
2901 size_t Context::ndevices() const
2903 return p ? p->devices.size() : 0;
2906 Device& Context::device(size_t idx) const
2908 static Device dummy;
2909 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2912 Context& Context::getDefault(bool initialize)
2914 auto& c = OpenCLExecutionContext::getCurrent();
2917 auto& ctx = c.getContext();
2921 CV_UNUSED(initialize);
2922 static Context dummy;
2926 Program Context::getProg(const ProgramSource& prog,
2927 const String& buildopts, String& errmsg)
2929 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2932 void Context::unloadProg(Program& prog)
2935 p->unloadProg(prog);
2939 Context Context::fromHandle(void* context)
2942 ctx.p = Impl::findOrCreateContext((cl_context)context);
2947 Context Context::fromDevice(const ocl::Device& device)
2950 ctx.p = Impl::findOrCreateContext(device);
2955 Context Context::create(const std::string& configuration)
2958 ctx.p = Impl::findOrCreateContext(configuration);
2962 #ifdef HAVE_OPENCL_SVM
2963 bool Context::useSVM() const
2965 Context::Impl* i = p;
2967 if (!i->svmInitialized)
2969 return i->svmEnabled;
2971 void Context::setUseSVM(bool enabled)
2973 Context::Impl* i = p;
2975 if (!i->svmInitialized)
2977 if (enabled && !i->svmAvailable)
2979 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
2981 i->svmEnabled = enabled;
2984 bool Context::useSVM() const { return false; }
2985 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
2988 #ifdef HAVE_OPENCL_SVM
2991 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
2993 Context::Impl* i = context.p;
2995 if (!i->svmInitialized)
2997 return i->svmCapabilities;
3000 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
3002 Context::Impl* i = context.p;
3004 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
3005 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
3006 return &i->svmFunctions;
3009 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
3011 if (checkForceSVMUmatUsage())
3013 if (checkDisableSVMUMatUsage())
3015 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
3017 return false; // don't use SVM by default
3020 } // namespace cv::ocl::svm
3021 #endif // HAVE_OPENCL_SVM
3024 static void get_platform_name(cl_platform_id id, String& name)
3026 // get platform name string length
3028 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
3030 // get platform name string
3031 AutoBuffer<char> buf(sz + 1);
3032 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
3034 // just in case, ensure trailing zero for ASCIIZ string
3041 // Attaches OpenCL context to OpenCV
3043 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
3045 auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3050 OpenCLExecutionContext OpenCLExecutionContext::create(
3051 const std::string& platformName, void* platformID, void* context, void* deviceID
3055 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
3058 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
3061 CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!");
3063 std::vector<cl_platform_id> platforms(cnt);
3065 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
3067 bool platformAvailable = false;
3069 // check if external platformName contained in list of available platforms in OpenCV
3070 for (unsigned int i = 0; i < cnt; i++)
3072 String availablePlatformName;
3073 get_platform_name(platforms[i], availablePlatformName);
3074 // external platform is found in the list of available platforms
3075 if (platformName == availablePlatformName)
3077 platformAvailable = true;
3082 if (!platformAvailable)
3083 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3085 // check if platformID corresponds to platformName
3086 String actualPlatformName;
3087 get_platform_name((cl_platform_id)platformID, actualPlatformName);
3088 if (platformName != actualPlatformName)
3089 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3091 OpenCLExecutionContext ctx;
3092 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>((cl_platform_id)platformID, (cl_context)context, (cl_device_id)deviceID);
3093 CV_OCL_CHECK(clReleaseContext((cl_context)context));
3094 CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID));
3098 void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device)
3100 // internal call, less checks
3101 cl_platform_id platformID = (cl_platform_id)_platform;
3102 cl_context context = (cl_context)_context;
3103 cl_device_id deviceID = (cl_device_id)_device;
3105 std::string platformName = PlatformInfo(&platformID).name();
3107 auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3108 CV_Assert(!clExecCtx.empty());
3109 ctx = clExecCtx.getContext();
3112 /////////////////////////////////////////// Queue /////////////////////////////////////////////
3116 inline void __init()
3120 isProfilingQueue_ = false;
3123 Impl(cl_command_queue q)
3128 cl_command_queue_properties props = 0;
3129 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
3130 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
3133 Impl(cl_command_queue q, bool isProfilingQueue)
3137 isProfilingQueue_ = isProfilingQueue;
3140 Impl(const Context& c, const Device& d, bool withProfiling = false)
3144 const Context* pc = &c;
3145 cl_context ch = (cl_context)pc->ptr();
3148 pc = &Context::getDefault();
3149 ch = (cl_context)pc->ptr();
3151 cl_device_id dh = (cl_device_id)d.ptr();
3153 dh = (cl_device_id)pc->device(0).ptr();
3155 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
3156 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
3157 isProfilingQueue_ = withProfiling;
3163 if (!cv::__termination)
3168 CV_OCL_DBG_CHECK(clFinish(handle));
3169 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
3175 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
3177 if (isProfilingQueue_)
3180 if (profiling_queue_.ptr())
3181 return profiling_queue_;
3184 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
3186 cl_device_id device = 0;
3187 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
3189 cl_int result = CL_SUCCESS;
3190 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
3191 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
3192 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
3195 queue.p = new Impl(q, true);
3196 profiling_queue_ = queue;
3198 return profiling_queue_;
3201 IMPLEMENT_REFCOUNTABLE();
3203 cl_command_queue handle;
3204 bool isProfilingQueue_;
3205 cv::ocl::Queue profiling_queue_;
3213 Queue::Queue(const Context& c, const Device& d)
3219 Queue::Queue(const Queue& q)
3226 Queue& Queue::operator = (const Queue& q)
3228 Impl* newp = (Impl*)q.p;
3243 bool Queue::create(const Context& c, const Device& d)
3248 return p->handle != 0;
3251 void Queue::finish()
3255 CV_OCL_DBG_CHECK(clFinish(p->handle));
3259 const Queue& Queue::getProfilingQueue() const
3262 return p->getProfilingQueue(*this);
3265 void* Queue::ptr() const
3267 return p ? p->handle : 0;
3270 Queue& Queue::getDefault()
3272 auto& c = OpenCLExecutionContext::getCurrent();
3275 auto& q = c.getQueue();
3282 static cl_command_queue getQueue(const Queue& q)
3284 cl_command_queue qq = (cl_command_queue)q.ptr();
3286 qq = (cl_command_queue)Queue::getDefault().ptr();
3290 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
3292 KernelArg::KernelArg()
3293 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
3297 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
3298 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
3300 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
3303 KernelArg KernelArg::Constant(const Mat& m)
3305 CV_Assert(m.isContinuous());
3306 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
3309 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
3313 Impl(const char* kname, const Program& prog) :
3314 refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
3316 cl_program ph = (cl_program)prog.ptr();
3321 handle = clCreateKernel(ph, kname, &retval);
3322 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
3324 for( int i = 0; i < MAX_ARRS; i++ )
3326 haveTempDstUMats = false;
3327 haveTempSrcUMats = false;
3332 for( int i = 0; i < MAX_ARRS; i++ )
3335 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
3337 u[i]->flags |= UMatData::ASYNC_CLEANUP;
3338 u[i]->currAllocator->deallocate(u[i]);
3343 haveTempDstUMats = false;
3344 haveTempSrcUMats = false;
3347 void addUMat(const UMat& m, bool dst)
3349 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
3351 CV_XADD(&m.u->urefcount, 1);
3353 if(dst && m.u->tempUMat())
3354 haveTempDstUMats = true;
3355 if(m.u->originalUMatData == NULL && m.u->tempUMat())
3356 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
3359 void addImage(const Image2D& image)
3361 images.push_back(image);
3364 void finit(cl_event e)
3369 isInProgress = false;
3373 bool run(int dims, size_t _globalsize[], size_t _localsize[],
3374 bool sync, int64* timeNS, const Queue& q);
3380 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
3384 IMPLEMENT_REFCOUNTABLE();
3388 enum { MAX_ARRS = 16 };
3389 UMatData* u[MAX_ARRS];
3391 bool isAsyncRun; // true if kernel was scheduled in async mode
3393 std::list<Image2D> images;
3394 bool haveTempDstUMats;
3395 bool haveTempSrcUMats;
3398 }} // namespace cv::ocl
3402 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
3406 ((cv::ocl::Kernel::Impl*)p)->finit(e);
3408 catch (const cv::Exception& exc)
3410 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
3412 catch (const std::exception& exc)
3414 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
3418 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
3424 namespace cv { namespace ocl {
3431 Kernel::Kernel(const char* kname, const Program& prog)
3434 create(kname, prog);
3437 Kernel::Kernel(const char* kname, const ProgramSource& src,
3438 const String& buildopts, String* errmsg)
3441 create(kname, src, buildopts, errmsg);
3444 Kernel::Kernel(const Kernel& k)
3451 Kernel& Kernel::operator = (const Kernel& k)
3453 Impl* newp = (Impl*)k.p;
3468 bool Kernel::create(const char* kname, const Program& prog)
3472 p = new Impl(kname, prog);
3478 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
3484 bool Kernel::create(const char* kname, const ProgramSource& src,
3485 const String& buildopts, String* errmsg)
3493 if( !errmsg ) errmsg = &tempmsg;
3494 const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
3495 return create(kname, prog);
3498 void* Kernel::ptr() const
3500 return p ? p->handle : 0;
3503 bool Kernel::empty() const
3508 int Kernel::set(int i, const void* value, size_t sz)
3510 if (!p || !p->handle)
3517 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
3518 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%p)", p->name.c_str(), (int)i, (int)sz, (void*)value).c_str());
3519 if (retval != CL_SUCCESS)
3524 int Kernel::set(int i, const Image2D& image2D)
3526 p->addImage(image2D);
3527 cl_mem h = (cl_mem)image2D.ptr();
3528 return set(i, &h, sizeof(h));
3531 int Kernel::set(int i, const UMat& m)
3533 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
3536 int Kernel::set(int i, const KernelArg& arg)
3538 if( !p || !p->handle )
3542 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
3543 p->name.c_str(), (int)i));
3551 AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) |
3552 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0));
3553 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
3554 if (ptronly && arg.m->empty())
3556 cl_mem h_null = (cl_mem)NULL;
3557 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3558 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3561 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3565 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3566 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3572 #ifdef HAVE_OPENCL_SVM
3573 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3575 const Context& ctx = Context::getDefault();
3576 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3577 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3578 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3580 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3582 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3584 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3589 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3590 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3597 else if( arg.m->dims <= 2 )
3600 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3601 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3602 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3603 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3606 if( !(arg.flags & KernelArg::NO_SIZE) )
3608 int cols = u2d.cols*arg.wscale/arg.iwscale;
3609 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3610 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3611 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3612 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3619 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3620 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3621 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3622 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3623 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3624 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3626 if( !(arg.flags & KernelArg::NO_SIZE) )
3628 int cols = u3d.cols*arg.wscale/arg.iwscale;
3629 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3630 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3631 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3632 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3633 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3634 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3638 p->addUMat(*arg.m, !!(accessFlags & ACCESS_WRITE));
3641 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3642 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3646 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3647 bool sync, const Queue& q)
3652 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3654 CV_Assert(_globalsize != NULL);
3655 for (int i = 0; i < dims; i++)
3657 size_t val = _localsize ? _localsize[i] :
3658 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3659 CV_Assert( val > 0 );
3660 total *= _globalsize[i];
3661 if (_globalsize[i] == 1 && !_localsize)
3663 globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3665 CV_Assert(total > 0);
3667 return p->run(dims, globalsize, _localsize, sync, NULL, q);
3671 static bool isRaiseErrorOnReuseAsyncKernel()
3673 static bool initialized = false;
3674 static bool value = false;
3677 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
3683 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3684 bool sync, int64* timeNS, const Queue& q)
3686 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3690 CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
3696 CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3697 if (isRaiseErrorOnReuseAsyncKernel())
3699 return false; // OpenCV 5.0: raise error
3705 CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3706 if (isRaiseErrorOnReuseAsyncKernel())
3708 return false; // OpenCV 5.0: raise error
3711 cl_command_queue qq = getQueue(q);
3712 if (haveTempDstUMats)
3714 if (haveTempSrcUMats)
3718 cl_event asyncEvent = 0;
3719 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3720 NULL, globalsize, localsize, 0, 0,
3721 (sync && !timeNS) ? 0 : &asyncEvent);
3722 #if !CV_OPENCL_SHOW_RUN_KERNELS
3723 if (retval != CL_SUCCESS)
3726 cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%zux%zux%zu, localsize=%s) sync=%s", name.c_str(), (int)dims,
3727 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3728 (localsize ? cv::format("%zux%zux%zu", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3729 sync ? "true" : "false"
3731 if (retval != CL_SUCCESS)
3733 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3735 #if CV_OPENCL_TRACE_CHECK
3736 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3738 printf("%s\n", msg.c_str());
3742 if (sync || retval != CL_SUCCESS)
3744 CV_OCL_DBG_CHECK(clFinish(qq));
3747 if (retval == CL_SUCCESS)
3749 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3750 cl_ulong startTime, stopTime;
3751 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3752 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3753 *timeNS = (int64)(stopTime - startTime);
3765 isInProgress = true;
3766 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3769 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3770 return retval == CL_SUCCESS;
3773 bool Kernel::runTask(bool sync, const Queue& q)
3775 if(!p || !p->handle || p->isInProgress)
3778 cl_command_queue qq = getQueue(q);
3779 cl_event asyncEvent = 0;
3780 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3781 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3782 if (sync || retval != CL_SUCCESS)
3784 CV_OCL_DBG_CHECK(clFinish(qq));
3790 p->isInProgress = true;
3791 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3794 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3795 return retval == CL_SUCCESS;
3798 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3800 CV_Assert(p && p->handle && !p->isInProgress);
3801 Queue q = q_.ptr() ? q_ : Queue::getDefault();
3803 q.finish(); // call clFinish() on base queue
3804 Queue profilingQueue = q.getProfilingQueue();
3806 bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3807 return res ? timeNs : -1;
3810 size_t Kernel::workGroupSize() const
3812 if(!p || !p->handle)
3814 size_t val = 0, retsz = 0;
3815 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3816 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3817 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3818 return status == CL_SUCCESS ? val : 0;
3821 size_t Kernel::preferedWorkGroupSizeMultiple() const
3823 if(!p || !p->handle)
3825 size_t val = 0, retsz = 0;
3826 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3827 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3828 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3829 return status == CL_SUCCESS ? val : 0;
3832 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3834 if(!p || !p->handle || !wsz)
3837 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3838 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3839 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3840 return status == CL_SUCCESS;
3843 size_t Kernel::localMemSize() const
3845 if(!p || !p->handle)
3849 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3850 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
3851 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
3852 return status == CL_SUCCESS ? (size_t)val : 0;
3857 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3859 struct ProgramSource::Impl
3861 IMPLEMENT_REFCOUNTABLE();
3864 PROGRAM_SOURCE_CODE = 0,
3870 Impl(const String& src)
3872 init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
3873 initFromSource(src, cv::String());
3875 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
3877 init(PROGRAM_SOURCE_CODE, module, name);
3878 initFromSource(codeStr, codeHash);
3882 void init(enum KIND kind, const String& module, const String& name)
3891 isHashUpdated = false;
3894 void initFromSource(const String& codeStr, const String& codeHash)
3897 sourceHash_ = codeHash;
3898 if (sourceHash_.empty())
3904 isHashUpdated = true;
3908 void updateHash(const char* hashStr = NULL)
3912 sourceHash_ = cv::String(hashStr);
3913 isHashUpdated = true;
3919 case PROGRAM_SOURCE_CODE:
3922 CV_Assert(codeStr_.empty());
3923 hash = crc64(sourceAddr_, sourceSize_); // static storage
3927 CV_Assert(!codeStr_.empty());
3928 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
3931 case PROGRAM_BINARIES:
3934 hash = crc64(sourceAddr_, sourceSize_);
3937 CV_Error(Error::StsInternal, "Internal error");
3939 sourceHash_ = cv::format("%08jx", (uintmax_t)hash);
3940 isHashUpdated = true;
3943 Impl(enum KIND kind,
3944 const String& module, const String& name,
3945 const unsigned char* binary, const size_t size,
3946 const cv::String& buildOptions = cv::String())
3948 init(kind, module, name);
3950 sourceAddr_ = binary;
3953 buildOptions_ = buildOptions;
3956 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
3957 const char* sourceCodeStaticStr, const char* hashStaticStr,
3958 const cv::String& buildOptions)
3960 ProgramSource result;
3961 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
3962 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
3963 result.p->updateHash(hashStaticStr);
3967 static ProgramSource fromBinary(const String& module, const String& name,
3968 const unsigned char* binary, const size_t size,
3969 const cv::String& buildOptions)
3971 ProgramSource result;
3972 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
3976 static ProgramSource fromSPIR(const String& module, const String& name,
3977 const unsigned char* binary, const size_t size,
3978 const cv::String& buildOptions)
3980 ProgramSource result;
3981 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
3988 // TODO std::vector<ProgramSource> includes_;
3989 String codeStr_; // PROGRAM_SOURCE_CODE only
3991 const unsigned char* sourceAddr_;
3994 cv::String buildOptions_;
3999 friend struct Program::Impl;
4000 friend struct internal::ProgramEntry;
4001 friend struct Context::Impl;
4005 ProgramSource::ProgramSource()
4010 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
4012 p = new Impl(module, name, codeStr, codeHash);
4015 ProgramSource::ProgramSource(const char* prog)
4020 ProgramSource::ProgramSource(const String& prog)
4025 ProgramSource::~ProgramSource()
4031 ProgramSource::ProgramSource(const ProgramSource& prog)
4038 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
4040 Impl* newp = (Impl*)prog.p;
4049 const String& ProgramSource::source() const
4052 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
4053 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
4057 ProgramSource::hash_t ProgramSource::hash() const
4059 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
4062 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
4063 const unsigned char* binary, const size_t size,
4064 const cv::String& buildOptions)
4067 CV_Assert(size > 0);
4068 return Impl::fromBinary(module, name, binary, size, buildOptions);
4071 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
4072 const unsigned char* binary, const size_t size,
4073 const cv::String& buildOptions)
4076 CV_Assert(size > 0);
4077 return Impl::fromBinary(module, name, binary, size, buildOptions);
4081 internal::ProgramEntry::operator ProgramSource&() const
4083 if (this->pProgramSource == NULL)
4085 cv::AutoLock lock(cv::getInitializationMutex());
4086 if (this->pProgramSource == NULL)
4088 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
4089 ProgramSource* ptr = new ProgramSource(ps);
4090 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
4093 return *this->pProgramSource;
4098 /////////////////////////////////////////// Program /////////////////////////////////////////////
4101 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
4109 return a + (cv::String(" ") + b);
4112 struct Program::Impl
4114 IMPLEMENT_REFCOUNTABLE();
4116 Impl(const ProgramSource& src,
4117 const String& _buildflags, String& errmsg) :
4120 buildflags(_buildflags)
4122 const ProgramSource::Impl* src_ = src.getImpl();
4124 sourceModule_ = src_->module_;
4125 sourceName_ = src_->name_;
4126 const Context ctx = Context::getDefault();
4127 Device device = ctx.device(0);
4128 if (ctx.ptr() == NULL || device.ptr() == NULL)
4130 buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
4131 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4134 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
4135 else if (device.isIntel())
4136 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
4137 const String param_buildExtraOptions = getBuildExtraOptions();
4138 if (!param_buildExtraOptions.empty())
4139 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
4141 compile(ctx, src_, errmsg);
4144 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4146 CV_Assert(ctx.getImpl());
4149 // We don't cache OpenCL binaries
4150 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
4152 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4153 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4156 return compileWithCache(ctx, src_, errmsg);
4159 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4161 CV_Assert(ctx.getImpl());
4163 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
4165 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4166 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
4167 const std::string base_dir = config.prepareCacheDirectoryForContext(
4168 ctx.getImpl()->getPrefixString(),
4169 ctx.getImpl()->getPrefixBase()
4171 const String& hash_str = src_->sourceHash_;
4173 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
4175 CV_Assert(!hash_str.empty());
4176 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
4177 fname = utils::fs::join(base_dir, fname);
4179 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
4180 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
4184 std::vector<char> binaryBuf;
4187 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4188 BinaryProgramFile file(fname, hash_str.c_str());
4189 res = file.read(buildflags, binaryBuf);
4193 CV_Assert(!binaryBuf.empty());
4194 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
4195 bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
4200 catch (const cv::Exception& e)
4203 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
4207 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
4210 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4211 CV_Assert(handle == NULL);
4212 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4214 if (!buildFromSources(ctx, src_, errmsg))
4219 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
4221 buildflags = joinBuildOptions(buildflags, " -x spir");
4222 if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
4224 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
4226 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4227 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4231 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
4233 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
4237 CV_Error(Error::StsInternal, "Internal error");
4239 CV_Assert(handle != NULL);
4240 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4241 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
4245 std::vector<char> binaryBuf;
4246 getProgramBinary(binaryBuf);
4248 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4249 BinaryProgramFile file(fname, hash_str.c_str());
4250 file.write(buildflags, binaryBuf);
4253 catch (const cv::Exception& e)
4255 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
4259 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
4262 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4263 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4264 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4266 std::vector<char> binaryBuf;
4267 getProgramBinary(binaryBuf);
4268 if (!binaryBuf.empty())
4270 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4272 createFromBinary(ctx, binaryBuf, errmsg);
4276 return handle != NULL;
4279 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
4281 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
4284 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4285 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
4286 if (log_retval == CL_SUCCESS && retsz > 1)
4288 buffer.resize(retsz + 16);
4289 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4290 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
4291 if (log_retval == CL_SUCCESS)
4293 if (retsz < buffer.size())
4296 buffer[buffer.size() - 1] = 0;
4304 errmsg = String(buffer.data());
4305 printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
4306 sourceModule_.c_str(), sourceName_.c_str(),
4307 result, getOpenCLErrorString(result),
4308 buildflags.c_str(), errmsg.c_str());
4312 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4315 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
4316 CV_Assert(handle == NULL);
4317 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
4318 sourceModule_.c_str(), sourceName_.c_str(),
4319 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
4321 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
4323 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
4324 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
4325 CV_Assert(srcptr != NULL);
4326 CV_Assert(srclen > 0);
4330 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
4331 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
4332 CV_Assert(handle || retval != CL_SUCCESS);
4333 if (handle && retval == CL_SUCCESS)
4335 size_t n = ctx.ndevices();
4336 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
4337 cl_device_id* deviceList = deviceListBuf.data();
4338 for (size_t i = 0; i < n; i++)
4340 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
4343 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
4344 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
4345 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
4346 if (retval != CL_SUCCESS)
4349 dumpBuildLog_(retval, deviceList, errmsg);
4351 // don't remove "retval != CL_SUCCESS" condition here:
4352 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
4353 if (retval != CL_SUCCESS && handle)
4355 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4359 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4360 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4362 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
4364 char kernels_buffer[4096] = {0};
4365 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4366 if (retsz < sizeof(kernels_buffer))
4367 kernels_buffer[retsz] = 0;
4369 kernels_buffer[0] = 0;
4370 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4375 return handle != NULL;
4378 void getProgramBinary(std::vector<char>& buf)
4382 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
4384 uchar* ptr = (uchar*)&buf[0];
4385 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
4388 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
4390 return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
4393 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
4395 CV_Assert(handle == NULL);
4396 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
4397 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
4399 CV_Assert(binarySize > 0);
4401 size_t ndevices = (int)ctx.ndevices();
4402 AutoBuffer<cl_device_id> devices_(ndevices);
4403 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
4404 AutoBuffer<size_t> binarySizes_(ndevices);
4406 cl_device_id* devices = devices_.data();
4407 const uchar** binaryPtrs = binaryPtrs_.data();
4408 size_t* binarySizes = binarySizes_.data();
4409 for (size_t i = 0; i < ndevices; i++)
4411 devices[i] = (cl_device_id)ctx.device(i).ptr();
4412 binaryPtrs[i] = binaryAddr;
4413 binarySizes[i] = binarySize;
4417 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
4418 binarySizes, binaryPtrs, NULL, &result);
4419 if (result != CL_SUCCESS)
4421 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
4424 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4432 // call clBuildProgram()
4434 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
4435 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
4436 if (result != CL_SUCCESS)
4438 dumpBuildLog_(result, devices, errmsg);
4441 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4447 // check build status
4449 cl_build_status build_status = CL_BUILD_NONE;
4451 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
4452 sizeof(build_status), &build_status, &retsz));
4453 if (result == CL_SUCCESS)
4455 if (build_status == CL_BUILD_SUCCESS)
4461 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
4467 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
4470 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4475 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4476 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4478 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
4480 char kernels_buffer[4096] = {0};
4481 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4482 if (retsz < sizeof(kernels_buffer))
4483 kernels_buffer[retsz] = 0;
4485 kernels_buffer[0] = 0;
4486 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4489 return handle != NULL;
4497 if (!cv::__termination)
4500 clReleaseProgram(handle);
4509 String sourceModule_;
4514 Program::Program() { p = 0; }
4516 Program::Program(const ProgramSource& src,
4517 const String& buildflags, String& errmsg)
4520 create(src, buildflags, errmsg);
4523 Program::Program(const Program& prog)
4530 Program& Program::operator = (const Program& prog)
4532 Impl* newp = (Impl*)prog.p;
4547 bool Program::create(const ProgramSource& src,
4548 const String& buildflags, String& errmsg)
4555 p = new Impl(src, buildflags, errmsg);
4564 void* Program::ptr() const
4566 return p ? p->handle : 0;
4569 #ifndef OPENCV_REMOVE_DEPRECATED_API
4570 const ProgramSource& Program::source() const
4572 CV_Error(Error::StsNotImplemented, "Removed API");
4575 bool Program::read(const String& bin, const String& buildflags)
4577 CV_UNUSED(bin); CV_UNUSED(buildflags);
4578 CV_Error(Error::StsNotImplemented, "Removed API");
4581 bool Program::write(String& bin) const
4584 CV_Error(Error::StsNotImplemented, "Removed API");
4587 String Program::getPrefix() const
4591 Context::Impl* ctx_ = Context::getDefault().getImpl();
4593 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4596 String Program::getPrefix(const String& buildflags)
4598 Context::Impl* ctx_ = Context::getDefault().getImpl();
4600 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4602 #endif // OPENCV_REMOVE_DEPRECATED_API
4604 void Program::getBinary(std::vector<char>& binary) const
4606 CV_Assert(p && "Empty program");
4607 p->getProgramBinary(binary);
4610 Program Context::Impl::getProg(const ProgramSource& src,
4611 const String& buildflags, String& errmsg)
4613 size_t limit = getProgramCountLimit();
4614 const ProgramSource::Impl* src_ = src.getImpl();
4616 String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4617 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4618 getPrefixString().c_str(),
4619 buildflags.c_str());
4621 cv::AutoLock lock(program_cache_mutex);
4622 phash_t::iterator it = phash.find(key);
4623 if (it != phash.end())
4626 CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4627 if (i != cacheList.end() && i != cacheList.begin())
4630 cacheList.push_front(key);
4634 { // cleanup program cache
4635 size_t sz = phash.size();
4636 if (limit > 0 && sz >= limit)
4638 static bool warningFlag = false;
4641 printf("\nWARNING: OpenCV-OpenCL:\n"
4642 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4643 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4646 while (!cacheList.empty())
4648 size_t c = phash.erase(cacheList.back());
4649 cacheList.pop_back();
4656 Program prog(src, buildflags, errmsg);
4657 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4659 cv::AutoLock lock(program_cache_mutex);
4660 phash.insert(std::pair<std::string, Program>(key, prog));
4661 cacheList.push_front(key);
4667 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4669 template<typename T>
4670 class OpenCLBufferPool
4673 ~OpenCLBufferPool() { }
4675 virtual T allocate(size_t size) = 0;
4676 virtual void release(T buffer) = 0;
4679 template <typename Derived, typename BufferEntry, typename T>
4680 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4683 inline Derived& derived() { return *static_cast<Derived*>(this); }
4687 size_t currentReservedSize;
4688 size_t maxReservedSize;
4690 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4691 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4694 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4696 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4697 for (; i != allocatedEntries_.end(); ++i)
4699 BufferEntry& e = *i;
4700 if (e.clBuffer_ == buffer)
4703 allocatedEntries_.erase(i);
4711 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4713 if (reservedEntries_.empty())
4715 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4716 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4718 size_t minDiff = (size_t)(-1);
4719 for (; i != reservedEntries_.end(); ++i)
4721 BufferEntry& e = *i;
4722 if (e.capacity_ >= size)
4724 size_t diff = e.capacity_ - size;
4725 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4735 if (result_pos != reservedEntries_.end())
4737 //CV_DbgAssert(result == *result_pos);
4738 reservedEntries_.erase(result_pos);
4740 currentReservedSize -= entry.capacity_;
4741 allocatedEntries_.push_back(entry);
4748 void _checkSizeOfReservedEntries()
4750 while (currentReservedSize > maxReservedSize)
4752 CV_DbgAssert(!reservedEntries_.empty());
4753 const BufferEntry& entry = reservedEntries_.back();
4754 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4755 currentReservedSize -= entry.capacity_;
4756 derived()._releaseBufferEntry(entry);
4757 reservedEntries_.pop_back();
4761 inline size_t _allocationGranularity(size_t size)
4764 if (size < 1024*1024)
4765 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4766 else if (size < 16*1024*1024)
4773 OpenCLBufferPoolBaseImpl()
4774 : currentReservedSize(0),
4779 virtual ~OpenCLBufferPoolBaseImpl()
4781 freeAllReservedBuffers();
4782 CV_Assert(reservedEntries_.empty());
4785 virtual T allocate(size_t size) CV_OVERRIDE
4787 AutoLock locker(mutex_);
4789 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4791 CV_DbgAssert(size <= entry.capacity_);
4792 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4796 derived()._allocateBufferEntry(entry, size);
4798 return entry.clBuffer_;
4800 virtual void release(T buffer) CV_OVERRIDE
4802 AutoLock locker(mutex_);
4804 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
4805 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
4807 derived()._releaseBufferEntry(entry);
4811 reservedEntries_.push_front(entry);
4812 currentReservedSize += entry.capacity_;
4813 _checkSizeOfReservedEntries();
4817 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
4818 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
4819 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
4821 AutoLock locker(mutex_);
4822 size_t oldMaxReservedSize = maxReservedSize;
4823 maxReservedSize = size;
4824 if (maxReservedSize < oldMaxReservedSize)
4826 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4827 for (; i != reservedEntries_.end();)
4829 const BufferEntry& entry = *i;
4830 if (entry.capacity_ > maxReservedSize / 8)
4832 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4833 currentReservedSize -= entry.capacity_;
4834 derived()._releaseBufferEntry(entry);
4835 i = reservedEntries_.erase(i);
4840 _checkSizeOfReservedEntries();
4843 virtual void freeAllReservedBuffers() CV_OVERRIDE
4845 AutoLock locker(mutex_);
4846 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
4847 for (; i != reservedEntries_.end(); ++i)
4849 const BufferEntry& entry = *i;
4850 derived()._releaseBufferEntry(entry);
4852 reservedEntries_.clear();
4853 currentReservedSize = 0;
4857 struct CLBufferEntry
4861 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
4864 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
4867 typedef struct CLBufferEntry BufferEntry;
4871 OpenCLBufferPoolImpl(int createFlags = 0)
4872 : createFlags_(createFlags)
4876 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4878 CV_DbgAssert(entry.clBuffer_ == NULL);
4879 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4880 Context& ctx = Context::getDefault();
4881 cl_int retval = CL_SUCCESS;
4882 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
4883 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
4884 CV_Assert(entry.clBuffer_ != NULL);
4885 if(retval == CL_SUCCESS)
4887 CV_IMPL_ADD(CV_IMPL_OCL);
4889 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
4890 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4891 allocatedEntries_.push_back(entry);
4894 void _releaseBufferEntry(const BufferEntry& entry)
4896 CV_Assert(entry.capacity_ != 0);
4897 CV_Assert(entry.clBuffer_ != NULL);
4898 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
4899 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4900 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
4904 #ifdef HAVE_OPENCL_SVM
4905 struct CLSVMBufferEntry
4909 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
4911 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
4914 typedef struct CLSVMBufferEntry BufferEntry;
4916 OpenCLSVMBufferPoolImpl()
4920 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4922 CV_DbgAssert(entry.clBuffer_ == NULL);
4923 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4925 Context& ctx = Context::getDefault();
4926 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4927 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4928 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
4929 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4931 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4932 CV_DbgAssert(svmFns->isValid());
4934 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
4935 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
4938 entry.clBuffer_ = buf;
4940 CV_IMPL_ADD(CV_IMPL_OCL);
4942 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
4943 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4944 allocatedEntries_.push_back(entry);
4947 void _releaseBufferEntry(const BufferEntry& entry)
4949 CV_Assert(entry.capacity_ != 0);
4950 CV_Assert(entry.clBuffer_ != NULL);
4951 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
4952 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4953 Context& ctx = Context::getDefault();
4954 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4955 CV_DbgAssert(svmFns->isValid());
4956 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
4957 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
4964 template <bool readAccess, bool writeAccess>
4965 class AlignedDataPtr
4969 uchar* const originPtr_;
4970 const size_t alignment_;
4972 uchar* allocatedPtr_;
4975 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
4976 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
4978 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4979 CV_DbgAssert(!readAccess || ptr);
4980 if (((size_t)ptr_ & (alignment - 1)) != 0)
4982 allocatedPtr_ = new uchar[size_ + alignment - 1];
4983 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4986 memcpy(ptr_, originPtr_, size_);
4991 uchar* getAlignedPtr() const
4993 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5003 memcpy(originPtr_, ptr_, size_);
5005 delete[] allocatedPtr_;
5006 allocatedPtr_ = NULL;
5011 AlignedDataPtr(const AlignedDataPtr&); // disabled
5012 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
5015 template <bool readAccess, bool writeAccess>
5016 class AlignedDataPtr2D
5020 uchar* const originPtr_;
5021 const size_t alignment_;
5023 uchar* allocatedPtr_;
5029 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
5030 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
5032 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5033 CV_DbgAssert(!readAccess || ptr != NULL);
5034 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
5036 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
5037 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5040 for (size_t i = 0; i < rows_; i++)
5041 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
5046 uchar* getAlignedPtr() const
5048 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5058 for (size_t i = 0; i < rows_; i++)
5059 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
5061 delete[] allocatedPtr_;
5062 allocatedPtr_ = NULL;
5067 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
5068 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
5071 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
5072 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
5076 void Context::Impl::__init_buffer_pools()
5078 bufferPool_ = std::make_shared<OpenCLBufferPoolImpl>(0);
5079 OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get();
5080 bufferPoolHostPtr_ = std::make_shared<OpenCLBufferPoolImpl>(CL_MEM_ALLOC_HOST_PTR);
5081 OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get();
5083 size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
5084 size_t poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
5085 bufferPool.setMaxReservedSize(poolSize);
5086 size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
5087 bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr);
5089 #ifdef HAVE_OPENCL_SVM
5090 bufferPoolSVM_ = std::make_shared<OpenCLSVMBufferPoolImpl>();
5091 OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get();
5092 size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
5093 bufferPoolSVM.setMaxReservedSize(poolSizeSVM);
5096 CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr);
5099 class OpenCLAllocator CV_FINAL : public MatAllocator
5104 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
5105 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
5106 #ifdef HAVE_OPENCL_SVM
5107 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
5109 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
5114 matStdAllocator = Mat::getDefaultAllocator();
5118 flushCleanupQueue();
5121 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
5122 AccessFlag flags, UMatUsageFlags usageFlags) const
5124 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
5128 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
5130 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
5133 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
5135 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
5139 void getBestFlags(const Context& ctx, AccessFlag /*flags*/, UMatUsageFlags usageFlags, int& createFlags, UMatData::MemoryFlag& flags0) const
5141 const Device& dev = ctx.device(0);
5143 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
5144 createFlags |= CL_MEM_ALLOC_HOST_PTR;
5146 if (!isOpenCLCopyingForced() &&
5147 (isOpenCLMapForced() ||
5148 (dev.hostUnifiedMemory()
5155 flags0 = static_cast<UMatData::MemoryFlag>(0);
5157 flags0 = UMatData::COPY_ON_MAP;
5160 UMatData* allocate(int dims, const int* sizes, int type,
5161 void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5164 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5166 flushCleanupQueue();
5168 CV_Assert(data == 0);
5169 size_t total = CV_ELEM_SIZE(type);
5170 for( int i = dims-1; i >= 0; i-- )
5177 Context& ctx = Context::getDefault();
5179 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5180 Context::Impl& ctxImpl = *ctx.getImpl();
5182 int createFlags = 0;
5183 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5184 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
5186 void* handle = NULL;
5187 int allocatorFlags = 0;
5189 #ifdef HAVE_OPENCL_SVM
5190 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5191 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
5193 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
5194 handle = ctxImpl.getBufferPoolSVM().allocate(total);
5196 // this property is constant, so single buffer pool can be used here
5197 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5198 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5202 if (createFlags == 0)
5204 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
5205 handle = ctxImpl.getBufferPool().allocate(total);
5207 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
5209 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
5210 handle = ctxImpl.getBufferPoolHostPtr().allocate(total);
5214 CV_Assert(handle != NULL); // Unsupported, throw
5218 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5220 UMatData* u = new UMatData(this);
5225 u->allocatorFlags_ = allocatorFlags;
5226 u->allocatorContext = std::static_pointer_cast<void>(std::make_shared<ocl::Context>(ctx));
5227 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
5228 u->markHostCopyObsolete(true);
5229 opencl_allocator_stats.onAllocate(u->size);
5233 bool allocate(UMatData* u, AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5238 flushCleanupQueue();
5240 UMatDataAutoLock lock(u);
5244 CV_Assert(u->origdata != 0);
5245 Context& ctx = Context::getDefault();
5246 int createFlags = 0;
5247 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5248 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
5250 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
5252 cl_context ctx_handle = (cl_context)ctx.ptr();
5253 int allocatorFlags = 0;
5254 UMatData::MemoryFlag tempUMatFlags = static_cast<UMatData::MemoryFlag>(0);
5255 void* handle = NULL;
5256 cl_int retval = CL_SUCCESS;
5258 #ifdef HAVE_OPENCL_SVM
5259 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5260 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
5261 if (useSVM && svmCaps.isSupportFineGrainSystem())
5263 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
5264 tempUMatFlags = UMatData::TEMP_UMAT;
5265 handle = u->origdata;
5266 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
5268 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
5270 if (!(accessFlags & ACCESS_FAST)) // memcpy used
5272 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5274 cl_svm_mem_flags memFlags = createFlags |
5275 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5277 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5278 CV_DbgAssert(svmFns->isValid());
5280 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
5281 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
5284 cl_command_queue q = NULL;
5285 if (!isFineGrainBuffer)
5287 q = (cl_command_queue)Queue::getDefault().ptr();
5288 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
5289 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
5292 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5295 memcpy(handle, u->origdata, u->size);
5296 if (!isFineGrainBuffer)
5298 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
5299 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
5300 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5303 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
5304 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
5305 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5312 accessFlags &= ~ACCESS_FAST;
5314 tempUMatFlags = UMatData::TEMP_UMAT;
5319 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
5320 // There are OpenCL runtime issues for less aligned data
5321 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
5322 && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
5323 // Avoid sharing of host memory between OpenCL buffers
5324 && !(u->originalUMatData && u->originalUMatData->handle)
5327 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
5328 u->size, u->origdata, &retval);
5329 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
5330 (long long int)u->size, u->origdata, (void*)handle).c_str());
5332 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
5334 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
5335 u->size, u->origdata, &retval);
5336 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
5337 (long long int)u->size, u->origdata, (void*)handle).c_str());
5338 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
5341 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
5342 if(!handle || retval != CL_SUCCESS)
5345 u->prevAllocator = u->currAllocator;
5346 u->currAllocator = this;
5347 u->flags |= tempUMatFlags | flags0;
5348 u->allocatorFlags_ = allocatorFlags;
5350 if (!!(accessFlags & ACCESS_WRITE))
5351 u->markHostCopyObsolete(true);
5352 opencl_allocator_stats.onAllocate(u->size);
5356 /*void sync(UMatData* u) const
5358 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5359 UMatDataAutoLock lock(u);
5361 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
5363 if( u->tempCopiedUMat() )
5365 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5366 u->size, u->origdata, 0, 0, 0);
5371 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5372 (CL_MAP_READ | CL_MAP_WRITE),
5373 0, u->size, 0, 0, 0, &retval);
5374 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5377 u->markHostCopyObsolete(false);
5379 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
5381 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5382 u->size, u->data, 0, 0, 0);
5386 void deallocate(UMatData* u) const CV_OVERRIDE
5391 CV_Assert(u->urefcount == 0);
5392 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
5394 CV_Assert(u->handle != 0);
5395 CV_Assert(u->mapcount == 0);
5397 if (!!(u->flags & UMatData::ASYNC_CLEANUP))
5398 addToCleanupQueue(u);
5403 void deallocate_(UMatData* u) const
5406 CV_Assert(u->handle);
5407 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
5409 opencl_allocator_stats.onFree(u->size);
5413 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
5414 return; // avoid any OpenCL calls
5418 CV_Assert(u->origdata);
5419 // UMatDataAutoLock lock(u);
5421 if (u->hostCopyObsolete())
5423 #ifdef HAVE_OPENCL_SVM
5424 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5426 Context& ctx = Context::getDefault();
5427 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5428 CV_DbgAssert(svmFns->isValid());
5430 if( u->tempCopiedUMat() )
5432 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5433 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
5434 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
5435 cl_command_queue q = NULL;
5436 if (!isFineGrainBuffer)
5438 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
5439 q = (cl_command_queue)Queue::getDefault().ptr();
5440 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5441 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5444 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5447 memcpy(u->origdata, u->handle, u->size);
5448 if (!isFineGrainBuffer)
5450 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5451 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5452 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5457 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
5464 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5465 if( u->tempCopiedUMat() )
5467 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5468 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5469 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
5476 CV_Assert(u->mapcount == 0);
5477 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
5478 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5479 (CL_MAP_READ | CL_MAP_WRITE),
5480 0, u->size, 0, 0, 0, &retval);
5481 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
5482 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
5483 if (u->originalUMatData)
5485 CV_Assert(u->originalUMatData->data == data);
5487 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5488 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
5489 CV_OCL_DBG_CHECK(clFinish(q));
5493 u->markHostCopyObsolete(false);
5499 #ifdef HAVE_OPENCL_SVM
5500 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5502 if( u->tempCopiedUMat() )
5504 Context& ctx = Context::getDefault();
5505 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5506 CV_DbgAssert(svmFns->isValid());
5508 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
5509 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
5515 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
5516 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
5519 u->markDeviceCopyObsolete(true);
5520 u->currAllocator = u->prevAllocator;
5521 u->prevAllocator = NULL;
5522 if(u->data && u->copyOnMap() && u->data != u->origdata)
5524 u->data = u->origdata;
5525 u->currAllocator->deallocate(u);
5530 CV_Assert(u->origdata == NULL);
5531 if(u->data && u->copyOnMap() && u->data != u->origdata)
5535 u->markHostCopyObsolete(true);
5537 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
5539 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5541 ocl::Context& ctx = *pCtx.get();
5542 CV_Assert(ctx.getImpl());
5543 ctx.getImpl()->getBufferPool().release((cl_mem)u->handle);
5545 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
5547 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5549 ocl::Context& ctx = *pCtx.get();
5550 CV_Assert(ctx.getImpl());
5551 ctx.getImpl()->getBufferPoolHostPtr().release((cl_mem)u->handle);
5553 #ifdef HAVE_OPENCL_SVM
5554 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
5556 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5558 ocl::Context& ctx = *pCtx.get();
5559 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5563 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5564 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5566 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5567 CV_DbgAssert(svmFns->isValid());
5568 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5570 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
5572 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5573 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5574 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5577 CV_Assert(ctx.getImpl());
5578 ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle);
5583 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5586 u->markDeviceCopyObsolete(true);
5590 CV_Assert(u == NULL);
5593 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
5594 void map(UMatData* u, AccessFlag accessFlags) const CV_OVERRIDE
5596 CV_Assert(u && u->handle);
5598 if (!!(accessFlags & ACCESS_WRITE))
5599 u->markDeviceCopyObsolete(true);
5601 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5604 if( !u->copyOnMap() )
5607 // because there can be other map requests for the same UMat with different access flags,
5608 // we use the universal (read-write) access mode.
5609 #ifdef HAVE_OPENCL_SVM
5610 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5612 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5614 Context& ctx = Context::getDefault();
5615 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5616 CV_DbgAssert(svmFns->isValid());
5618 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5620 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5621 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5624 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5625 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5629 u->data = (uchar*)u->handle;
5630 u->markHostCopyObsolete(false);
5631 u->markDeviceMemMapped(true);
5636 cl_int retval = CL_SUCCESS;
5637 if (!u->deviceMemMapped())
5639 CV_Assert(u->refcount == 1);
5640 CV_Assert(u->mapcount++ == 0);
5641 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5642 (CL_MAP_READ | CL_MAP_WRITE),
5643 0, u->size, 0, 0, 0, &retval);
5644 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5646 if (u->data && retval == CL_SUCCESS)
5648 u->markHostCopyObsolete(false);
5649 u->markDeviceMemMapped(true);
5653 // TODO Is it really a good idea and was it tested well?
5654 // if map failed, switch to copy-on-map mode for the particular buffer
5655 u->flags |= UMatData::COPY_ON_MAP;
5660 u->data = (uchar*)fastMalloc(u->size);
5661 u->markHostCopyObsolete(true);
5665 if (!!(accessFlags & ACCESS_READ) && u->hostCopyObsolete())
5667 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5668 #ifdef HAVE_OPENCL_SVM
5669 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5671 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5672 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5673 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5674 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5675 u->markHostCopyObsolete(false);
5679 void unmap(UMatData* u) const CV_OVERRIDE
5685 CV_Assert(u->handle != 0);
5687 UMatDataAutoLock autolock(u);
5689 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5691 if( !u->copyOnMap() && u->deviceMemMapped() )
5693 CV_Assert(u->data != NULL);
5694 #ifdef HAVE_OPENCL_SVM
5695 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5697 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5699 Context& ctx = Context::getDefault();
5700 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5701 CV_DbgAssert(svmFns->isValid());
5703 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5705 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5706 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5708 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5710 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5713 if (u->refcount == 0)
5715 u->markDeviceCopyObsolete(false);
5716 u->markHostCopyObsolete(true);
5720 if (u->refcount == 0)
5722 CV_Assert(u->mapcount-- == 1);
5723 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5724 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5725 if (Device::getDefault().isAMD())
5727 // required for multithreaded applications (see stitching test)
5728 CV_OCL_DBG_CHECK(clFinish(q));
5730 u->markDeviceMemMapped(false);
5732 u->markDeviceCopyObsolete(false);
5733 u->markHostCopyObsolete(true);
5736 else if( u->copyOnMap() && u->deviceCopyObsolete() )
5738 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5739 #ifdef HAVE_OPENCL_SVM
5740 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5742 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5743 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5744 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5745 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5746 u->markDeviceCopyObsolete(false);
5747 u->markHostCopyObsolete(true);
5751 bool checkContinuous(int dims, const size_t sz[],
5752 const size_t srcofs[], const size_t srcstep[],
5753 const size_t dstofs[], const size_t dststep[],
5754 size_t& total, size_t new_sz[],
5755 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5756 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5758 bool iscontinuous = true;
5759 srcrawofs = srcofs ? srcofs[dims-1] : 0;
5760 dstrawofs = dstofs ? dstofs[dims-1] : 0;
5762 for( int i = dims-2; i >= 0; i-- )
5764 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5765 iscontinuous = false;
5768 srcrawofs += srcofs[i]*srcstep[i];
5770 dstrawofs += dstofs[i]*dststep[i];
5775 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5778 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5779 // we assume that new_... arrays are initialized by caller
5780 // with 0's, so there is no else branch
5783 new_srcofs[0] = srcofs[1];
5784 new_srcofs[1] = srcofs[0];
5790 new_dstofs[0] = dstofs[1];
5791 new_dstofs[1] = dstofs[0];
5795 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5796 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5800 // we could check for dims == 3 here,
5801 // but from user perspective this one is more informative
5802 CV_Assert(dims <= 3);
5803 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
5806 new_srcofs[0] = srcofs[2];
5807 new_srcofs[1] = srcofs[1];
5808 new_srcofs[2] = srcofs[0];
5813 new_dstofs[0] = dstofs[2];
5814 new_dstofs[1] = dstofs[1];
5815 new_dstofs[2] = dstofs[0];
5818 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
5819 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
5822 return iscontinuous;
5825 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
5826 const size_t srcofs[], const size_t srcstep[],
5827 const size_t dststep[]) const CV_OVERRIDE
5831 UMatDataAutoLock autolock(u);
5833 if( u->data && !u->hostCopyObsolete() )
5835 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
5838 CV_Assert( u->handle != 0 );
5840 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5842 size_t total = 0, new_sz[] = {0, 0, 0};
5843 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5844 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5846 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
5848 srcrawofs, new_srcofs, new_srcstep,
5849 dstrawofs, new_dstofs, new_dststep);
5851 #ifdef HAVE_OPENCL_SVM
5852 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5854 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5855 Context& ctx = Context::getDefault();
5856 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5857 CV_DbgAssert(svmFns->isValid());
5859 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5860 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5862 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5863 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5866 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5871 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
5875 // This code is from MatAllocator::download()
5876 int isz[CV_MAX_DIM];
5877 uchar* srcptr = (uchar*)u->handle;
5878 for( int i = 0; i < dims; i++ )
5880 CV_Assert( sz[i] <= (size_t)INT_MAX );
5884 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5885 isz[i] = (int)sz[i];
5888 Mat src(dims, isz, CV_8U, srcptr, srcstep);
5889 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5891 const Mat* arrays[] = { &src, &dst };
5893 NAryMatIterator it(arrays, ptrs, 2);
5894 size_t j, planesz = it.size;
5896 for( j = 0; j < it.nplanes; j++, ++it )
5897 memcpy(ptrs[1], ptrs[0], planesz);
5899 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5901 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5902 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5904 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5913 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5914 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5915 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
5917 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5919 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5920 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5921 size_t membuf_ofs = srcrawofs - new_srcrawofs;
5922 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5923 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5924 uchar* ptr = alignedPtr.getAlignedPtr();
5926 CV_Assert(new_srcstep[0] >= new_sz[0]);
5927 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
5928 total = std::min(total, u->size - new_srcrawofs);
5929 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5930 new_srcrawofs, total, ptr, 0, 0, 0));
5931 for( size_t i = 0; i < new_sz[1]; i++ )
5932 memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
5936 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5937 uchar* ptr = alignedPtr.getAlignedPtr();
5939 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5940 new_srcofs, new_dstofs, new_sz,
5948 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
5949 const size_t dstofs[], const size_t dststep[],
5950 const size_t srcstep[]) const CV_OVERRIDE
5955 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5956 CV_Assert(u->refcount == 0 || u->tempUMat());
5958 size_t total = 0, new_sz[] = {0, 0, 0};
5959 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5960 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5962 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
5964 srcrawofs, new_srcofs, new_srcstep,
5965 dstrawofs, new_dstofs, new_dststep);
5967 UMatDataAutoLock autolock(u);
5969 // if there is cached CPU copy of the GPU matrix,
5970 // we could use it as a destination.
5971 // we can do it in 2 cases:
5972 // 1. we overwrite the whole content
5973 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
5974 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
5976 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
5977 u->markHostCopyObsolete(false);
5978 u->markDeviceCopyObsolete(true);
5982 CV_Assert( u->handle != 0 );
5983 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5985 #ifdef HAVE_OPENCL_SVM
5986 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5988 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5989 Context& ctx = Context::getDefault();
5990 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5991 CV_DbgAssert(svmFns->isValid());
5993 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5994 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5996 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5997 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
6000 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6005 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
6009 // This code is from MatAllocator::upload()
6010 int isz[CV_MAX_DIM];
6011 uchar* dstptr = (uchar*)u->handle;
6012 for( int i = 0; i < dims; i++ )
6014 CV_Assert( sz[i] <= (size_t)INT_MAX );
6018 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6019 isz[i] = (int)sz[i];
6022 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
6023 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6025 const Mat* arrays[] = { &src, &dst };
6027 NAryMatIterator it(arrays, ptrs, 2);
6028 size_t j, planesz = it.size;
6030 for( j = 0; j < it.nplanes; j++, ++it )
6031 memcpy(ptrs[1], ptrs[0], planesz);
6033 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6035 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6036 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6038 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6047 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6048 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6049 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
6050 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
6051 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
6053 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6055 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6056 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6057 size_t membuf_ofs = dstrawofs - new_dstrawofs;
6058 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
6059 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6060 uchar* ptr = alignedPtr.getAlignedPtr();
6062 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6063 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
6064 total = std::min(total, u->size - new_dstrawofs);
6065 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
6066 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
6067 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
6068 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6069 new_dstrawofs, total, ptr, 0, 0, 0));
6070 for( size_t i = 0; i < new_sz[1]; i++ )
6071 memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
6072 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6073 new_dstrawofs, total, ptr, 0, 0, 0));
6077 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6078 uchar* ptr = alignedPtr.getAlignedPtr();
6080 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6081 new_dstofs, new_srcofs, new_sz,
6087 u->markHostCopyObsolete(true);
6088 #ifdef HAVE_OPENCL_SVM
6089 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6090 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6097 u->markHostCopyObsolete(true);
6099 u->markDeviceCopyObsolete(false);
6102 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
6103 const size_t srcofs[], const size_t srcstep[],
6104 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
6109 size_t total = 0, new_sz[] = {0, 0, 0};
6110 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6111 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6113 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
6115 srcrawofs, new_srcofs, new_srcstep,
6116 dstrawofs, new_dstofs, new_dststep);
6118 UMatDataAutoLock src_autolock(src, dst);
6120 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
6122 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6125 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
6127 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6128 dst->markHostCopyObsolete(false);
6129 #ifdef HAVE_OPENCL_SVM
6130 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6131 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6138 dst->markDeviceCopyObsolete(true);
6143 // there should be no user-visible CPU copies of the UMat which we are going to copy to
6144 CV_Assert(dst->refcount == 0);
6145 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6147 cl_int retval = CL_SUCCESS;
6148 #ifdef HAVE_OPENCL_SVM
6149 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
6150 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6152 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
6153 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6155 Context& ctx = Context::getDefault();
6156 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6157 CV_DbgAssert(svmFns->isValid());
6161 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
6162 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
6163 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
6164 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
6165 total, 0, NULL, NULL);
6166 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
6171 // This code is from MatAllocator::download()/upload()
6172 int isz[CV_MAX_DIM];
6173 uchar* srcptr = (uchar*)src->handle;
6174 for( int i = 0; i < dims; i++ )
6176 CV_Assert( sz[i] <= (size_t)INT_MAX );
6180 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6181 isz[i] = (int)sz[i];
6183 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
6185 uchar* dstptr = (uchar*)dst->handle;
6186 for( int i = 0; i < dims; i++ )
6189 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6191 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
6193 const Mat* arrays[] = { &m_src, &m_dst };
6195 NAryMatIterator it(arrays, ptrs, 2);
6196 size_t j, planesz = it.size;
6198 for( j = 0; j < it.nplanes; j++, ++it )
6199 memcpy(ptrs[1], ptrs[0], planesz);
6204 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6206 map(src, ACCESS_READ);
6207 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6212 map(dst, ACCESS_WRITE);
6213 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6223 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6224 srcrawofs, dstrawofs, total, 0, 0, 0);
6225 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
6226 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
6228 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6230 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6231 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6232 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
6233 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6234 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
6236 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6237 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6238 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
6239 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6240 uchar* srcptr = srcBuf.getAlignedPtr();
6241 uchar* dstptr = dstBuf.getAlignedPtr();
6243 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6245 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
6246 src_total = std::min(src_total, src->size - new_srcrawofs);
6247 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
6248 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
6250 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
6251 new_srcrawofs, src_total, srcptr, 0, 0, 0));
6252 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6253 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6255 for( size_t i = 0; i < new_sz[1]; i++ )
6256 memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
6257 srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
6258 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6259 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6263 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6264 new_srcofs, new_dstofs, new_sz,
6270 if (retval == CL_SUCCESS)
6272 CV_IMPL_ADD(CV_IMPL_OCL)
6275 #ifdef HAVE_OPENCL_SVM
6276 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6277 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6284 dst->markHostCopyObsolete(true);
6286 dst->markDeviceCopyObsolete(false);
6290 CV_OCL_DBG_CHECK(clFinish(q));
6294 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE
6296 ocl::Context ctx = Context::getDefault();
6299 #ifdef HAVE_OPENCL_SVM
6300 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
6302 return &ctx.getImpl()->getBufferPoolSVM();
6305 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
6307 return &ctx.getImpl()->getBufferPoolHostPtr();
6309 if (id != NULL && strcmp(id, "OCL") != 0)
6311 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
6313 return &ctx.getImpl()->getBufferPool();
6316 MatAllocator* matStdAllocator;
6318 mutable cv::Mutex cleanupQueueMutex;
6319 mutable std::deque<UMatData*> cleanupQueue;
6321 void flushCleanupQueue() const
6323 if (!cleanupQueue.empty())
6325 std::deque<UMatData*> q;
6327 cv::AutoLock lock(cleanupQueueMutex);
6328 q.swap(cleanupQueue);
6330 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
6336 void addToCleanupQueue(UMatData* u) const
6338 //TODO: Validation check: CV_Assert(!u->tempUMat());
6340 cv::AutoLock lock(cleanupQueueMutex);
6341 cleanupQueue.push_back(u);
6346 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
6348 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
6349 g_isOpenCVActivated = true;
6352 MatAllocator* getOpenCLAllocator()
6354 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
6357 }} // namespace cv::ocl
6362 // three funcs below are implemented in umatrix.cpp
6363 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
6364 bool autoSteps = false );
6365 void finalizeHdr(UMat& m);
6370 namespace cv { namespace ocl {
6373 // Convert OpenCL buffer memory to UMat
6375 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
6378 int sizes[] = { rows, cols };
6380 CV_Assert(0 <= d && d <= CV_MAX_DIM);
6384 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
6385 dst.usageFlags = USAGE_DEFAULT;
6387 setSize(dst, d, sizes, 0, true);
6390 cl_mem memobj = (cl_mem)cl_mem_buffer;
6391 cl_mem_object_type mem_type = 0;
6393 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6395 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
6398 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
6400 CV_OCL_CHECK(clRetainMemObject(memobj));
6402 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
6403 CV_Assert(total >= rows * step);
6405 // attach clBuffer to UMatData
6406 dst.u = new UMatData(getOpenCLAllocator());
6408 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
6409 dst.u->flags = static_cast<UMatData::MemoryFlag>(0);
6410 dst.u->handle = cl_mem_buffer;
6411 dst.u->origdata = 0;
6412 dst.u->prevAllocator = 0;
6413 dst.u->size = total;
6419 } // convertFromBuffer()
6423 // Convert OpenCL image2d_t memory to UMat
6425 void convertFromImage(void* cl_mem_image, UMat& dst)
6427 cl_mem clImage = (cl_mem)cl_mem_image;
6428 cl_mem_object_type mem_type = 0;
6430 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6432 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
6434 cl_image_format fmt = { 0, 0 };
6435 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
6438 switch (fmt.image_channel_data_type)
6441 case CL_UNSIGNED_INT8:
6446 case CL_SIGNED_INT8:
6450 case CL_UNORM_INT16:
6451 case CL_UNSIGNED_INT16:
6455 case CL_SNORM_INT16:
6456 case CL_SIGNED_INT16:
6460 case CL_SIGNED_INT32:
6469 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
6473 switch (fmt.image_channel_order)
6476 type = CV_MAKE_TYPE(depth, 1);
6482 type = CV_MAKE_TYPE(depth, 4);
6486 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
6491 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
6494 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
6497 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
6499 dst.create((int)h, (int)w, type);
6501 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
6503 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6506 size_t src_origin[3] = { 0, 0, 0 };
6507 size_t region[3] = { w, h, 1 };
6508 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
6510 CV_OCL_CHECK(clFinish(q));
6513 } // convertFromImage()
6516 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
6518 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
6520 cl_uint numDevices = 0;
6521 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
6522 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
6524 CV_OCL_DBG_CHECK_RESULT(status,
6525 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
6528 if (numDevices == 0)
6534 devices.resize((size_t)numDevices);
6535 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
6538 struct PlatformInfo::Impl
6543 handle = *(cl_platform_id*)id;
6544 getDevices(devices, handle);
6547 String getStrProp(cl_platform_info prop) const
6551 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
6552 sz < sizeof(buf) ? String(buf) : String();
6555 IMPLEMENT_REFCOUNTABLE();
6556 std::vector<cl_device_id> devices;
6557 cl_platform_id handle;
6560 PlatformInfo::PlatformInfo()
6565 PlatformInfo::PlatformInfo(void* platform_id)
6567 p = new Impl(platform_id);
6570 PlatformInfo::~PlatformInfo()
6576 PlatformInfo::PlatformInfo(const PlatformInfo& i)
6583 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6596 int PlatformInfo::deviceNumber() const
6598 return p ? (int)p->devices.size() : 0;
6601 void PlatformInfo::getDevice(Device& device, int d) const
6603 CV_Assert(p && d < (int)p->devices.size() );
6605 device.set(p->devices[d]);
6608 String PlatformInfo::name() const
6610 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6613 String PlatformInfo::vendor() const
6615 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6618 String PlatformInfo::version() const
6620 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
6623 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6625 cl_uint numPlatforms = 0;
6626 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6628 if (numPlatforms == 0)
6634 platforms.resize((size_t)numPlatforms);
6635 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6638 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6640 std::vector<cl_platform_id> platforms;
6641 getPlatforms(platforms);
6643 for (size_t i = 0; i < platforms.size(); i++)
6644 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6647 const char* typeToStr(int type)
6649 static const char* tab[]=
6651 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6652 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6653 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6654 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6655 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6656 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6657 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6658 "half", "half2", "half3", "half4", 0, 0, 0, "half8", 0, 0, 0, 0, 0, 0, 0, "half16",
6659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6661 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6662 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6667 const char* memopTypeToStr(int type)
6669 static const char* tab[] =
6671 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6672 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6673 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6674 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6675 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6676 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6677 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6678 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6681 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6682 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6687 const char* vecopTypeToStr(int type)
6689 static const char* tab[] =
6691 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6692 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6693 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6694 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6695 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6696 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6697 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6698 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6701 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6702 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6707 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6709 if( sdepth == ddepth )
6711 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6712 if( ddepth >= CV_32F ||
6713 (ddepth == CV_32S && sdepth < CV_32S) ||
6714 (ddepth == CV_16S && sdepth <= CV_8S) ||
6715 (ddepth == CV_16U && sdepth == CV_8U))
6717 sprintf(buf, "convert_%s", typestr);
6719 else if( sdepth >= CV_32F )
6720 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6722 sprintf(buf, "convert_%s_sat", typestr);
6727 const char* getOpenCLErrorString(int errorCode)
6729 #define CV_OCL_CODE(id) case id: return #id
6730 #define CV_OCL_CODE_(id, name) case id: return #name
6733 CV_OCL_CODE(CL_SUCCESS);
6734 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6735 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6736 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6737 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6738 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6739 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6740 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6741 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6742 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6743 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6744 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
6745 CV_OCL_CODE(CL_MAP_FAILURE);
6746 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
6747 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
6748 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
6749 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
6750 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
6751 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
6752 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
6753 CV_OCL_CODE(CL_INVALID_VALUE);
6754 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
6755 CV_OCL_CODE(CL_INVALID_PLATFORM);
6756 CV_OCL_CODE(CL_INVALID_DEVICE);
6757 CV_OCL_CODE(CL_INVALID_CONTEXT);
6758 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
6759 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
6760 CV_OCL_CODE(CL_INVALID_HOST_PTR);
6761 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
6762 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
6763 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
6764 CV_OCL_CODE(CL_INVALID_SAMPLER);
6765 CV_OCL_CODE(CL_INVALID_BINARY);
6766 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
6767 CV_OCL_CODE(CL_INVALID_PROGRAM);
6768 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
6769 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
6770 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
6771 CV_OCL_CODE(CL_INVALID_KERNEL);
6772 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
6773 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
6774 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
6775 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
6776 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
6777 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
6778 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
6779 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
6780 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
6781 CV_OCL_CODE(CL_INVALID_EVENT);
6782 CV_OCL_CODE(CL_INVALID_OPERATION);
6783 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
6784 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
6785 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
6786 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
6788 CV_OCL_CODE(CL_INVALID_PROPERTY);
6790 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
6791 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
6792 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
6793 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
6795 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
6796 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
6798 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
6799 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
6800 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
6801 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
6802 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
6803 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
6804 default: return "Unknown OpenCL error";
6810 template <typename T>
6811 static std::string kerToStr(const Mat & k)
6813 int width = k.cols - 1, depth = k.depth();
6814 const T * const data = k.ptr<T>();
6816 std::ostringstream stream;
6817 stream.precision(10);
6821 for (int i = 0; i < width; ++i)
6822 stream << "DIG(" << (int)data[i] << ")";
6823 stream << "DIG(" << (int)data[width] << ")";
6825 else if (depth == CV_32F)
6827 stream.setf(std::ios_base::showpoint);
6828 for (int i = 0; i < width; ++i)
6829 stream << "DIG(" << data[i] << "f)";
6830 stream << "DIG(" << data[width] << "f)";
6834 for (int i = 0; i < width; ++i)
6835 stream << "DIG(" << data[i] << ")";
6836 stream << "DIG(" << data[width] << ")";
6839 return stream.str();
6842 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
6844 Mat kernel = _kernel.getMat().reshape(1, 1);
6846 int depth = kernel.depth();
6850 if (ddepth != depth)
6851 kernel.convertTo(kernel, ddepth);
6853 typedef std::string (* func_t)(const Mat &);
6854 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
6855 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
6856 const func_t func = funcs[ddepth];
6857 CV_Assert(func != 0);
6859 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
6862 #define PROCESS_SRC(src) \
6867 CV_Assert(src.isMat() || src.isUMat()); \
6868 Size csize = src.size(); \
6869 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
6870 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
6871 if (cwidth < ckercn || ckercn <= 0) \
6873 cols.push_back(cwidth); \
6874 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
6876 offsets.push_back(src.offset()); \
6877 steps.push_back(src.step()); \
6878 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
6879 kercns.push_back(ckercn); \
6884 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
6885 InputArray src4, InputArray src5, InputArray src6,
6886 InputArray src7, InputArray src8, InputArray src9,
6887 OclVectorStrategy strat)
6889 const ocl::Device & d = ocl::Device::getDefault();
6891 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
6892 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
6893 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
6894 d.preferredVectorWidthDouble(), -1 };
6896 // if the device says don't use vectors
6897 if (vectorWidths[0] == 1)
6900 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
6901 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
6902 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
6905 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
6908 int checkOptimalVectorWidth(const int *vectorWidths,
6909 InputArray src1, InputArray src2, InputArray src3,
6910 InputArray src4, InputArray src5, InputArray src6,
6911 InputArray src7, InputArray src8, InputArray src9,
6912 OclVectorStrategy strat)
6914 CV_Assert(vectorWidths);
6916 int ref_type = src1.type();
6918 std::vector<size_t> offsets, steps, cols;
6919 std::vector<int> dividers, kercns;
6930 size_t size = offsets.size();
6932 for (size_t i = 0; i < size; ++i)
6933 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
6934 dividers[i] >>= 1, kercns[i] >>= 1;
6937 int kercn = *std::min_element(kercns.begin(), kercns.end());
6942 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
6943 InputArray src4, InputArray src5, InputArray src6,
6944 InputArray src7, InputArray src8, InputArray src9)
6946 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
6952 // TODO Make this as a method of OpenCL "BuildOptions" class
6953 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
6955 if (!buildOptions.empty())
6956 buildOptions += " ";
6957 int type = _m.type(), depth = CV_MAT_DEPTH(type);
6958 buildOptions += format(
6959 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
6960 name.c_str(), ocl::typeToStr(type),
6961 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
6962 name.c_str(), (int)CV_MAT_CN(type),
6963 name.c_str(), (int)CV_ELEM_SIZE(type),
6964 name.c_str(), (int)CV_ELEM_SIZE1(type),
6965 name.c_str(), (int)depth
6970 struct Image2D::Impl
6972 Impl(const UMat &src, bool norm, bool alias)
6976 init(src, norm, alias);
6982 clReleaseMemObject(handle);
6985 static cl_image_format getImageFormat(int depth, int cn, bool norm)
6987 cl_image_format format;
6988 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
6989 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
6990 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
6991 CL_SNORM_INT16, -1, -1, -1, -1 };
6992 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
6994 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
6995 int channelOrder = channelOrders[cn];
6996 format.image_channel_data_type = (cl_channel_type)channelType;
6997 format.image_channel_order = (cl_channel_order)channelOrder;
7001 static bool isFormatSupported(cl_image_format format)
7004 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7006 cl_context context = (cl_context)Context::getDefault().ptr();
7010 // Figure out how many formats are supported by this context.
7011 cl_uint numFormats = 0;
7012 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7013 CL_MEM_OBJECT_IMAGE2D, numFormats,
7015 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
7018 AutoBuffer<cl_image_format> formats(numFormats);
7019 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7020 CL_MEM_OBJECT_IMAGE2D, numFormats,
7021 formats.data(), NULL);
7022 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
7023 for (cl_uint i = 0; i < numFormats; ++i)
7025 if (!memcmp(&formats[i], &format, sizeof(format)))
7034 void init(const UMat &src, bool norm, bool alias)
7037 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7039 CV_Assert(!src.empty());
7040 CV_Assert(ocl::Device::getDefault().imageSupport());
7042 int err, depth = src.depth(), cn = src.channels();
7044 cl_image_format format = getImageFormat(depth, cn, norm);
7046 if (!isFormatSupported(format))
7047 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
7049 if (alias && !src.handle(ACCESS_RW))
7050 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
7052 cl_context context = (cl_context)Context::getDefault().ptr();
7053 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
7055 #ifdef CL_VERSION_1_2
7056 // this enables backwards portability to
7057 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
7058 const Device & d = ocl::Device::getDefault();
7059 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
7060 CV_Assert(!alias || canCreateAlias(src));
7061 if (1 < major || (1 == major && 2 <= minor))
7064 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
7065 desc.image_width = src.cols;
7066 desc.image_height = src.rows;
7067 desc.image_depth = 0;
7068 desc.image_array_size = 1;
7069 desc.image_row_pitch = alias ? src.step[0] : 0;
7070 desc.image_slice_pitch = 0;
7071 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
7072 desc.num_mip_levels = 0;
7073 desc.num_samples = 0;
7074 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
7079 CV_SUPPRESS_DEPRECATED_START
7080 CV_Assert(!alias); // This is an OpenCL 1.2 extension
7081 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
7082 CV_SUPPRESS_DEPRECATED_END
7084 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
7086 size_t origin[] = { 0, 0, 0 };
7087 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
7090 if (!alias && !src.isContinuous())
7092 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
7093 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
7094 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
7097 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
7098 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
7099 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
7100 CV_OCL_DBG_CHECK(clFlush(queue));
7104 devData = (cl_mem)src.handle(ACCESS_READ);
7106 CV_Assert(devData != NULL);
7110 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
7111 if (!src.isContinuous())
7113 CV_OCL_DBG_CHECK(clFlush(queue));
7114 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
7119 IMPLEMENT_REFCOUNTABLE();
7129 Image2D::Image2D(const UMat &src, bool norm, bool alias)
7131 p = new Impl(src, norm, alias);
7134 bool Image2D::canCreateAlias(const UMat &m)
7137 const Device & d = ocl::Device::getDefault();
7138 if (d.imageFromBufferSupport() && !m.empty())
7140 // This is the required pitch alignment in pixels
7141 uint pitchAlign = d.imagePitchAlignment();
7142 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
7144 // We don't currently handle the case where the buffer was created
7145 // with CL_MEM_USE_HOST_PTR
7146 if (!m.u->tempUMat())
7155 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
7157 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
7159 return Impl::isFormatSupported(format);
7162 Image2D::Image2D(const Image2D & i)
7169 Image2D & Image2D::operator = (const Image2D & i)
7188 void* Image2D::ptr() const
7190 return p ? p->handle : 0;
7193 bool internal::isOpenCLForced()
7195 static bool initialized = false;
7196 static bool value = false;
7199 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
7205 bool internal::isPerformanceCheckBypassed()
7207 static bool initialized = false;
7208 static bool value = false;
7211 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
7217 bool internal::isCLBuffer(UMat& u)
7219 void* h = u.handle(ACCESS_RW);
7222 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
7224 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
7227 cl_mem_object_type type = 0;
7228 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
7229 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
7239 Impl(const Queue& q)
7248 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7254 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7258 uint64 durationNS() const
7260 return (uint64)(timer.getTimeSec() * 1e9);
7266 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
7267 Timer::~Timer() { delete p; }
7281 uint64 Timer::durationNS() const
7284 return p->durationNS();
7289 #endif // HAVE_OPENCL