1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
45 #include "ocl_disabled.impl.hpp"
54 #include <iostream> // std::cerr
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
60 #include <opencv2/core/utils/configuration.private.hpp>
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
80 #define CV_OPENCL_SHOW_RUN_KERNELS 0
81 #define CV_OPENCL_TRACE_CHECK 0
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
86 #define CV_OPENCL_SHOW_SVM_LOG 0
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
91 # define LOG_BUFFER_POOL printf
93 # define LOG_BUFFER_POOL(...)
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
101 #define CV_OPENCL_SVM_TRACE_P(...)
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
111 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
116 #ifdef HAVE_OPENCL_SVM
117 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
119 #include "opencv2/core/opencl/opencl_svm.hpp"
122 #include "umatrix.hpp"
124 namespace cv { namespace ocl {
126 #define IMPLEMENT_REFCOUNTABLE() \
127 void addref() { CV_XADD(&refcount, 1); } \
128 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
131 static cv::utils::AllocatorStatistics opencl_allocator_stats;
133 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
134 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
136 return opencl_allocator_stats;
140 static bool isRaiseError()
142 static bool initialized = false;
143 static bool value = false;
146 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
153 #if CV_OPENCL_TRACE_CHECK
155 void traceOpenCLCheck(cl_int status, const char* message)
157 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
159 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
161 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
164 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
165 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
167 #define CV_OCL_CHECK_RESULT(check_result, msg) \
169 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
170 if (check_result != CL_SUCCESS) \
172 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_CHECK_RESULT must be const char*"); \
173 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
174 CV_Error(Error::OpenCLApiCallError, error_msg); \
178 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
180 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
183 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
184 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
185 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
189 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
190 if (check_result != CL_SUCCESS && isRaiseError()) \
192 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_DBG_CHECK_RESULT must be const char*"); \
193 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
194 CV_Error(Error::OpenCLApiCallError, error_msg); \
197 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
198 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
202 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
203 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
204 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
205 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
207 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
208 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
211 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
212 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
220 static const String getBuildExtraOptions()
222 static String param_buildExtraOptions;
223 static bool initialized = false;
226 param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
228 if (!param_buildExtraOptions.empty())
229 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
231 return param_buildExtraOptions;
234 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
235 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
240 UMat2D(const UMat& m)
242 offset = (int)m.offset;
255 UMat3D(const UMat& m)
257 offset = (int)m.offset;
258 step = (int)m.step.p[1];
259 slicestep = (int)m.step.p[0];
260 slices = (int)m.size.p[0];
272 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
273 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
275 static uint64 table[256];
276 static bool initialized = false;
280 for( int i = 0; i < 256; i++ )
283 for( int j = 0; j < 8; j++ )
284 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
291 for( size_t idx = 0; idx < size; idx++ )
292 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
297 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
298 struct OpenCLBinaryCacheConfigurator
300 cv::String cache_path_;
301 cv::String cache_lock_filename_;
302 cv::Ptr<utils::fs::FileLock> cache_lock_;
304 typedef std::map<std::string, std::string> ContextCacheType;
305 ContextCacheType prepared_contexts_;
306 Mutex mutex_prepared_contexts_;
308 OpenCLBinaryCacheConfigurator()
310 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
311 if (!CV_OPENCL_CACHE_ENABLE)
313 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
316 cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
317 if (cache_path_.empty())
319 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
325 if (cache_path_.empty())
327 if (cache_path_ == "disabled")
329 if (!utils::fs::createDirectories(cache_path_))
331 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
336 if (CV_OPENCL_CACHE_LOCK_ENABLE)
338 cache_lock_filename_ = cache_path_ + ".lock";
339 if (!utils::fs::exists(cache_lock_filename_))
341 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
342 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
343 if (!lock_filename.is_open())
345 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
352 cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
353 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
355 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
357 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
359 catch (const cv::Exception& e)
361 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
365 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
370 if (CV_OPENCL_CACHE_WRITE)
372 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
373 "(not safe for multiprocess environment)");
377 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
381 catch (const cv::Exception& e)
383 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
387 if (!cache_path_.empty())
389 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
391 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
392 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
396 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
404 cache_lock_filename_.clear();
405 cache_lock_.release();
408 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
409 const std::string& cleanup_prefix)
411 if (cache_path_.empty())
412 return std::string();
414 AutoLock lock(mutex_prepared_contexts_);
416 ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
417 if (found_it != prepared_contexts_.end())
418 return found_it->second;
420 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
422 std::string target_directory = cache_path_ + ctx_prefix + "/";
423 bool result = utils::fs::isDirectory(target_directory);
428 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
429 if (utils::fs::createDirectories(target_directory))
435 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
438 catch (const cv::Exception& e)
440 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
443 target_directory = result ? target_directory : std::string();
444 prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
446 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
450 std::vector<String> entries;
451 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
452 std::vector<String> remove_entries;
453 for (size_t i = 0; i < entries.size(); i++)
455 const String& name = entries[i];
456 if (0 == name.find(cleanup_prefix))
458 if (0 == name.find(ctx_prefix))
459 continue; // skip current
460 remove_entries.push_back(name);
463 if (!remove_entries.empty())
465 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
466 ? "Detected OpenCL cache directory for other version of OpenCL device."
467 : "Detected OpenCL cache directories for other versions of OpenCL device.")
468 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
469 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
470 for (size_t i = 0; i < remove_entries.size(); i++)
472 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
474 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
476 for (size_t i = 0; i < remove_entries.size(); i++)
478 const String& name = remove_entries[i];
479 cv::String path = utils::fs::join(cache_path_, name);
482 utils::fs::remove_all(path);
483 CV_LOG_WARNING(NULL, "Removed: " << path);
485 catch (const cv::Exception& e)
487 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
494 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
498 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
499 return target_directory;
502 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
504 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
507 class BinaryProgramFile
509 enum { MAX_ENTRIES = 64 };
511 typedef unsigned int uint32_t;
513 struct CV_DECL_ALIGNED(4) FileHeader
515 uint32_t sourceSignatureSize;
516 //char sourceSignature[];
519 struct CV_DECL_ALIGNED(4) FileTable
521 uint32_t numberOfEntries;
522 //uint32_t firstEntryOffset[];
525 struct CV_DECL_ALIGNED(4) FileEntry
527 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
534 const std::string fileName_;
535 const char* const sourceSignature_;
536 const size_t sourceSignatureSize_;
540 uint32_t entryOffsets[MAX_ENTRIES];
542 uint32_t getHash(const std::string& options)
544 uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
545 return hash & (MAX_ENTRIES - 1);
548 inline size_t getFileSize()
550 size_t pos = (size_t)f.tellg();
551 f.seekg(0, std::fstream::end);
552 size_t fileSize = (size_t)f.tellg();
553 f.seekg(pos, std::fstream::beg);
556 inline uint32_t readUInt32()
559 f.read((char*)&res, sizeof(uint32_t));
560 CV_Assert(!f.fail());
563 inline void writeUInt32(const uint32_t value)
566 f.write((char*)&v, sizeof(uint32_t));
567 CV_Assert(!f.fail());
570 inline void seekReadAbsolute(size_t pos)
572 f.seekg(pos, std::fstream::beg);
573 CV_Assert(!f.fail());
575 inline void seekReadRelative(size_t pos)
577 f.seekg(pos, std::fstream::cur);
578 CV_Assert(!f.fail());
581 inline void seekWriteAbsolute(size_t pos)
583 f.seekp(pos, std::fstream::beg);
584 CV_Assert(!f.fail());
590 if (0 != remove(fileName_.c_str()))
591 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
596 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
597 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
599 CV_StaticAssert(sizeof(uint32_t) == 4, "");
600 CV_Assert(sourceSignature_ != NULL);
601 CV_Assert(sourceSignatureSize_ > 0);
602 memset(entryOffsets, 0, sizeof(entryOffsets));
604 f.rdbuf()->pubsetbuf(0, 0); // disable buffering
605 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
606 if(f.is_open() && getFileSize() > 0)
608 bool isValid = false;
611 uint32_t fileSourceSignatureSize = readUInt32();
612 if (fileSourceSignatureSize == sourceSignatureSize_)
614 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
615 f.read(fileSourceSignature.data(), fileSourceSignatureSize);
618 CV_LOG_ERROR(NULL, "Unexpected EOF");
620 else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
627 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
630 catch (const cv::Exception& e)
632 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
636 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
649 bool read(const std::string& key, std::vector<char>& buf)
654 size_t fileSize = getFileSize();
657 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
664 uint32_t fileSourceSignatureSize = readUInt32();
665 CV_Assert(fileSourceSignatureSize > 0);
666 seekReadRelative(fileSourceSignatureSize);
668 uint32_t numberOfEntries = readUInt32();
669 CV_Assert(numberOfEntries > 0);
670 if (numberOfEntries != MAX_ENTRIES)
672 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
676 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
677 CV_Assert(!f.fail());
679 uint32_t entryNum = getHash(key);
681 uint32_t entryOffset = entryOffsets[entryNum];
683 while (entryOffset > 0)
685 seekReadAbsolute(entryOffset);
686 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
687 f.read((char*)&entry, sizeof(entry));
688 CV_Assert(!f.fail());
689 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
690 if (key.size() == entry.keySize)
692 if (entry.keySize > 0)
694 f.read(fileKey.data(), entry.keySize);
695 CV_Assert(!f.fail());
697 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
699 buf.resize(entry.dataSize);
700 f.read(&buf[0], entry.dataSize);
701 CV_Assert(!f.fail());
703 CV_LOG_VERBOSE(NULL, 0, "Read...");
707 if (entry.nextEntryFileOffset == 0)
709 entryOffset = entry.nextEntryFileOffset;
714 bool write(const std::string& key, std::vector<char>& buf)
718 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
721 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
724 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
730 size_t fileSize = getFileSize();
734 seekWriteAbsolute(0);
735 writeUInt32((uint32_t)sourceSignatureSize_);
736 f.write(sourceSignature_, sourceSignatureSize_);
737 CV_Assert(!f.fail());
739 writeUInt32(MAX_ENTRIES);
740 memset(entryOffsets, 0, sizeof(entryOffsets));
741 f.write((char*)entryOffsets, sizeof(entryOffsets));
742 CV_Assert(!f.fail());
744 CV_Assert(!f.fail());
746 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
747 CV_Assert(f.is_open());
748 fileSize = getFileSize();
753 uint32_t fileSourceSignatureSize = readUInt32();
754 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
755 seekReadRelative(fileSourceSignatureSize);
757 uint32_t numberOfEntries = readUInt32();
758 CV_Assert(numberOfEntries > 0);
759 if (numberOfEntries != MAX_ENTRIES)
761 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
765 size_t tableEntriesOffset = (size_t)f.tellg();
766 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
767 CV_Assert(!f.fail());
769 uint32_t entryNum = getHash(key);
771 uint32_t entryOffset = entryOffsets[entryNum];
773 while (entryOffset > 0)
775 seekReadAbsolute(entryOffset);
776 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
777 f.read((char*)&entry, sizeof(entry));
778 CV_Assert(!f.fail());
779 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
780 if (key.size() == entry.keySize)
782 if (entry.keySize > 0)
784 f.read(fileKey.data(), entry.keySize);
785 CV_Assert(!f.fail());
787 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
790 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
794 if (entry.nextEntryFileOffset == 0)
796 entryOffset = entry.nextEntryFileOffset;
801 seekWriteAbsolute(entryOffset);
802 entry.nextEntryFileOffset = (uint32_t)fileSize;
803 f.write((char*)&entry, sizeof(entry));
804 CV_Assert(!f.fail());
808 entryOffsets[entryNum] = (uint32_t)fileSize;
809 seekWriteAbsolute(tableEntriesOffset);
810 f.write((char*)entryOffsets, sizeof(entryOffsets));
811 CV_Assert(!f.fail());
813 seekWriteAbsolute(fileSize);
814 entry.nextEntryFileOffset = 0;
815 entry.dataSize = (uint32_t)buf.size();
816 entry.keySize = (uint32_t)key.size();
817 f.write((char*)&entry, sizeof(entry));
818 CV_Assert(!f.fail());
819 f.write(key.c_str(), entry.keySize);
820 CV_Assert(!f.fail());
821 f.write(&buf[0], entry.dataSize);
822 CV_Assert(!f.fail());
824 CV_Assert(!f.fail());
825 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
829 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
833 struct OpenCLExecutionContext::Impl
835 ocl::Context context_;
836 int device_; // device index in context
843 void _init_device(cl_device_id deviceID)
846 int ndevices = (int)context_.ndevices();
847 CV_Assert(ndevices > 0);
849 for (int i = 0; i < ndevices; i++)
851 ocl::Device d = context_.device(i);
852 cl_device_id dhandle = (cl_device_id)d.ptr();
853 if (dhandle == deviceID)
860 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
863 void _init_device(const ocl::Device& device)
865 CV_Assert(device.ptr());
866 int ndevices = (int)context_.ndevices();
867 CV_Assert(ndevices > 0);
869 for (int i = 0; i < ndevices; i++)
871 ocl::Device d = context_.device(i);
872 if (d.getImpl() == device.getImpl())
879 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
883 Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID)
884 : device_(0), useOpenCL_(-1)
886 CV_UNUSED(platformID);
890 context_ = Context::fromHandle(context);
891 _init_device(deviceID);
892 queue_ = Queue(context_, context_.device(device_));
895 Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue)
896 : device_(0), useOpenCL_(-1)
898 CV_Assert(context.ptr());
899 CV_Assert(device.ptr());
902 _init_device(device);
906 Impl(const ocl::Context& context, const ocl::Device& device)
907 : device_(0), useOpenCL_(-1)
909 CV_Assert(context.ptr());
910 CV_Assert(device.ptr());
913 _init_device(device);
914 queue_ = Queue(context_, context_.device(device_));
917 Impl(const ocl::Context& context, const int device, const ocl::Queue& queue)
925 Impl(const Impl& other)
926 : context_(other.context_)
927 , device_(other.device_)
928 , queue_(other.queue_)
934 inline bool useOpenCL() const { return const_cast<Impl*>(this)->useOpenCL(); }
942 if (!context_.empty() && context_.ndevices() > 0)
944 const Device& d = context_.device(device_);
945 useOpenCL_ = d.available();
948 catch (const cv::Exception&)
953 CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context");
955 return useOpenCL_ > 0;
958 void setUseOpenCL(bool flag)
966 static const std::shared_ptr<Impl>& getInitializedExecutionContext()
970 CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context");
972 static bool initialized = false;
973 static std::shared_ptr<Impl> g_primaryExecutionContext;
977 cv::AutoLock lock(getInitializationMutex());
980 CV_LOG_INFO(NULL, "OpenCL: creating new execution context...");
983 Context c = ocl::Context::create(std::string());
987 auto& d = c.device(deviceId);
990 auto q = ocl::Queue(c, d);
993 CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue");
997 g_primaryExecutionContext = std::make_shared<Impl>(c, deviceId, q);
998 CV_LOG_INFO(NULL, "OpenCL: device=" << d.name());
1003 CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)");
1008 CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled");
1011 catch (const std::exception& e)
1013 CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what());
1017 CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception");
1022 return g_primaryExecutionContext;
1026 Context& OpenCLExecutionContext::getContext() const
1031 Device& OpenCLExecutionContext::getDevice() const
1034 return p->context_.device(p->device_);
1036 Queue& OpenCLExecutionContext::getQueue() const
1042 bool OpenCLExecutionContext::useOpenCL() const
1045 return p->useOpenCL();
1048 void OpenCLExecutionContext::setUseOpenCL(bool flag)
1051 p->setUseOpenCL(flag);
1055 OpenCLExecutionContext& OpenCLExecutionContext::getCurrent()
1057 CV_TRACE_FUNCTION();
1058 CoreTLSData& data = getCoreTlsData();
1059 OpenCLExecutionContext& c = data.oclExecutionContext;
1060 if (!data.oclExecutionContextInitialized)
1062 data.oclExecutionContextInitialized = true;
1063 if (c.empty() && haveOpenCL())
1064 c.p = Impl::getInitializedExecutionContext();
1070 OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef()
1072 CV_TRACE_FUNCTION();
1073 CoreTLSData& data = getCoreTlsData();
1074 OpenCLExecutionContext& c = data.oclExecutionContext;
1078 void OpenCLExecutionContext::bind() const
1080 CV_TRACE_FUNCTION();
1082 CoreTLSData& data = getCoreTlsData();
1083 data.oclExecutionContext = *this;
1084 data.oclExecutionContextInitialized = true;
1085 data.useOpenCL = p->useOpenCL_; // propagate "-1", avoid call useOpenCL()
1089 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const
1091 CV_TRACE_FUNCTION();
1093 const Queue q(getContext(), getDevice());
1094 return cloneWithNewQueue(q);
1097 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const
1099 CV_TRACE_FUNCTION();
1101 CV_Assert(q.ptr() != NULL);
1102 OpenCLExecutionContext c;
1103 c.p = std::make_shared<Impl>(p->context_, p->device_, q);
1108 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue)
1110 CV_TRACE_FUNCTION();
1112 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1114 CV_Assert(!context.empty());
1115 CV_Assert(context.ptr());
1116 CV_Assert(!device.empty());
1117 CV_Assert(device.ptr());
1118 OpenCLExecutionContext ctx;
1119 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device, queue);
1125 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device)
1127 CV_TRACE_FUNCTION();
1129 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1131 CV_Assert(!context.empty());
1132 CV_Assert(context.ptr());
1133 CV_Assert(!device.empty());
1134 CV_Assert(device.ptr());
1135 OpenCLExecutionContext ctx;
1136 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device);
1141 void OpenCLExecutionContext::release()
1143 CV_TRACE_FUNCTION();
1148 // true if we have initialized OpenCL subsystem with available platforms
1149 static bool g_isOpenCVActivated = false;
1153 CV_TRACE_FUNCTION();
1154 static bool g_isOpenCLInitialized = false;
1155 static bool g_isOpenCLAvailable = false;
1157 if (!g_isOpenCLInitialized)
1159 CV_TRACE_REGION("Init_OpenCL_Runtime");
1160 const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
1163 if (cv::String(envPath) == "disabled")
1165 g_isOpenCLAvailable = false;
1166 g_isOpenCLInitialized = true;
1171 cv::AutoLock lock(getInitializationMutex());
1172 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
1176 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1177 g_isOpenCVActivated = n > 0;
1178 CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms");
1182 g_isOpenCLAvailable = false;
1184 g_isOpenCLInitialized = true;
1186 return g_isOpenCLAvailable;
1191 CoreTLSData& data = getCoreTlsData();
1192 if (data.useOpenCL < 0)
1199 auto c = OpenCLExecutionContext::getCurrent();
1200 data.useOpenCL = c.useOpenCL();
1205 CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context");
1208 return data.useOpenCL > 0;
1211 bool isOpenCLActivated()
1213 if (!g_isOpenCVActivated)
1214 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
1218 void setUseOpenCL(bool flag)
1220 CV_TRACE_FUNCTION();
1222 CoreTLSData& data = getCoreTlsData();
1223 auto& c = OpenCLExecutionContext::getCurrentRef();
1226 c.setUseOpenCL(flag);
1227 data.useOpenCL = c.useOpenCL();
1234 data.useOpenCL = -1; // enabled by default (if context is not initialized)
1240 #ifdef HAVE_CLAMDBLAS
1245 static AmdBlasHelper & getInstance()
1247 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
1250 bool isAvailable() const
1252 return g_isAmdBlasAvailable;
1259 clAmdBlasTeardown();
1267 if (!g_isAmdBlasInitialized)
1269 AutoLock lock(getInitializationMutex());
1271 if (!g_isAmdBlasInitialized)
1277 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
1281 g_isAmdBlasAvailable = false;
1285 g_isAmdBlasAvailable = false;
1287 g_isAmdBlasInitialized = true;
1293 static bool g_isAmdBlasInitialized;
1294 static bool g_isAmdBlasAvailable;
1297 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1298 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1302 return AmdBlasHelper::getInstance().isAvailable();
1314 #ifdef HAVE_CLAMDFFT
1319 static AmdFftHelper & getInstance()
1321 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
1324 bool isAvailable() const
1326 return g_isAmdFftAvailable;
1333 // clAmdFftTeardown();
1341 if (!g_isAmdFftInitialized)
1343 AutoLock lock(getInitializationMutex());
1345 if (!g_isAmdFftInitialized)
1351 cl_uint major, minor, patch;
1352 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1354 // it throws exception in case AmdFft binaries are not found
1355 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1356 g_isAmdFftAvailable = true;
1358 catch (const Exception &)
1360 g_isAmdFftAvailable = false;
1364 g_isAmdFftAvailable = false;
1366 g_isAmdFftInitialized = true;
1372 static clAmdFftSetupData setupData;
1373 static bool g_isAmdFftInitialized;
1374 static bool g_isAmdFftAvailable;
1377 clAmdFftSetupData AmdFftHelper::setupData;
1378 bool AmdFftHelper::g_isAmdFftAvailable = false;
1379 bool AmdFftHelper::g_isAmdFftInitialized = false;
1383 return AmdFftHelper::getInstance().isAvailable();
1397 #ifdef HAVE_OPENCL_SVM
1406 Queue::getDefault().finish();
1409 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1411 struct Platform::Impl
1417 initialized = false;
1426 //cl_uint num_entries
1428 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1434 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1436 vendor = String(buf);
1443 IMPLEMENT_REFCOUNTABLE();
1445 cl_platform_id handle;
1450 Platform::Platform()
1455 Platform::~Platform()
1461 Platform::Platform(const Platform& pl)
1468 Platform& Platform::operator = (const Platform& pl)
1470 Impl* newp = (Impl*)pl.p;
1479 void* Platform::ptr() const
1481 return p ? p->handle : 0;
1484 Platform& Platform::getDefault()
1486 CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms");
1496 /////////////////////////////////////// Device ////////////////////////////////////////////
1498 // deviceVersion has format
1499 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1501 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1502 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1503 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1506 if (10 >= deviceVersion.length())
1508 const char *pstr = deviceVersion.c_str();
1509 if (0 != strncmp(pstr, "OpenCL ", 7))
1511 size_t ppos = deviceVersion.find('.', 7);
1512 if (String::npos == ppos)
1514 String temp = deviceVersion.substr(7, ppos - 7);
1515 major = atoi(temp.c_str());
1516 temp = deviceVersion.substr(ppos + 1);
1517 minor = atoi(temp.c_str());
1528 cl_device_id device = (cl_device_id)d;
1530 CV_OCL_CHECK(clRetainDevice(device)); // increment reference counter on success only
1538 void _init(cl_device_id d)
1540 handle = (cl_device_id)d;
1542 name_ = getStrProp(CL_DEVICE_NAME);
1543 version_ = getStrProp(CL_DEVICE_VERSION);
1544 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1545 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1546 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1547 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1548 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1549 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1550 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1551 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1553 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1554 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1557 while (pos < extensions_.size())
1559 size_t pos2 = extensions_.find(' ', pos);
1560 if (pos2 == String::npos)
1561 pos2 = extensions_.size();
1564 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1565 extensions_set_.insert(extensionName);
1570 intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1572 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1573 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1574 vendorName_ == "AMD")
1575 vendorID_ = VENDOR_AMD;
1576 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1577 vendorID_ = VENDOR_INTEL;
1578 else if (vendorName_ == "NVIDIA Corporation")
1579 vendorID_ = VENDOR_NVIDIA;
1581 vendorID_ = UNKNOWN_VENDOR;
1583 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1584 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1586 const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1587 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1588 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1589 maxWorkGroupSize_ = new_maxWorkGroupSize;
1592 if (isExtensionSupported("cl_khr_spir"))
1594 #ifndef CL_DEVICE_SPIR_VERSIONS
1595 #define CL_DEVICE_SPIR_VERSIONS 0x40E0
1597 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1598 std::cout << spir_versions << std::endl;
1606 if (!cv::__termination)
1611 CV_OCL_CHECK(clReleaseDevice(handle));
1617 template<typename _TpCL, typename _TpOut>
1618 _TpOut getProp(cl_device_info prop) const
1623 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1624 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1627 bool getBoolProp(cl_device_info prop) const
1629 cl_bool temp = CL_FALSE;
1632 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1633 sz == sizeof(temp) ? temp != 0 : false;
1636 String getStrProp(cl_device_info prop) const
1640 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1641 sz < sizeof(buf) ? String(buf) : String();
1644 bool isExtensionSupported(const std::string& extensionName) const
1646 return extensions_set_.count(extensionName) > 0;
1650 IMPLEMENT_REFCOUNTABLE();
1652 cl_device_id handle;
1656 std::string extensions_;
1657 int doubleFPConfig_;
1658 bool hostUnifiedMemory_;
1659 int maxComputeUnits_;
1660 size_t maxWorkGroupSize_;
1663 int deviceVersionMajor_;
1664 int deviceVersionMinor_;
1665 String driverVersion_;
1668 bool intelSubgroupsSupport_;
1670 std::set<std::string> extensions_set_;
1679 Device::Device(void* d)
1685 Device::Device(const Device& d)
1692 Device& Device::operator = (const Device& d)
1694 Impl* newp = (Impl*)d.p;
1709 void Device::set(void* d)
1716 CV_OCL_CHECK(clReleaseDevice((cl_device_id)d));
1720 Device Device::fromHandle(void* d)
1726 void* Device::ptr() const
1728 return p ? p->handle : 0;
1731 String Device::name() const
1732 { return p ? p->name_ : String(); }
1734 String Device::extensions() const
1735 { return p ? String(p->extensions_) : String(); }
1737 bool Device::isExtensionSupported(const String& extensionName) const
1738 { return p ? p->isExtensionSupported(extensionName) : false; }
1740 String Device::version() const
1741 { return p ? p->version_ : String(); }
1743 String Device::vendorName() const
1744 { return p ? p->vendorName_ : String(); }
1746 int Device::vendorID() const
1747 { return p ? p->vendorID_ : 0; }
1749 String Device::OpenCL_C_Version() const
1750 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1752 String Device::OpenCLVersion() const
1753 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1755 int Device::deviceVersionMajor() const
1756 { return p ? p->deviceVersionMajor_ : 0; }
1758 int Device::deviceVersionMinor() const
1759 { return p ? p->deviceVersionMinor_ : 0; }
1761 String Device::driverVersion() const
1762 { return p ? p->driverVersion_ : String(); }
1764 int Device::type() const
1765 { return p ? p->type_ : 0; }
1767 int Device::addressBits() const
1768 { return p ? p->addressBits_ : 0; }
1770 bool Device::available() const
1771 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1773 bool Device::compilerAvailable() const
1774 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1776 bool Device::linkerAvailable() const
1777 #ifdef CL_VERSION_1_2
1778 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1780 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1783 int Device::doubleFPConfig() const
1784 { return p ? p->doubleFPConfig_ : 0; }
1786 int Device::singleFPConfig() const
1787 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1789 int Device::halfFPConfig() const
1790 #ifdef CL_VERSION_1_2
1791 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1793 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1796 bool Device::endianLittle() const
1797 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1799 bool Device::errorCorrectionSupport() const
1800 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1802 int Device::executionCapabilities() const
1803 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1805 size_t Device::globalMemCacheSize() const
1806 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1808 int Device::globalMemCacheType() const
1809 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1811 int Device::globalMemCacheLineSize() const
1812 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1814 size_t Device::globalMemSize() const
1815 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1817 size_t Device::localMemSize() const
1818 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1820 int Device::localMemType() const
1821 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1823 bool Device::hostUnifiedMemory() const
1824 { return p ? p->hostUnifiedMemory_ : false; }
1826 bool Device::imageSupport() const
1827 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1829 bool Device::imageFromBufferSupport() const
1831 return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1834 uint Device::imagePitchAlignment() const
1836 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1837 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1843 uint Device::imageBaseAddressAlignment() const
1845 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1846 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1852 size_t Device::image2DMaxWidth() const
1853 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1855 size_t Device::image2DMaxHeight() const
1856 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1858 size_t Device::image3DMaxWidth() const
1859 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1861 size_t Device::image3DMaxHeight() const
1862 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1864 size_t Device::image3DMaxDepth() const
1865 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1867 size_t Device::imageMaxBufferSize() const
1868 #ifdef CL_VERSION_1_2
1869 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1871 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1874 size_t Device::imageMaxArraySize() const
1875 #ifdef CL_VERSION_1_2
1876 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1878 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1881 bool Device::intelSubgroupsSupport() const
1882 { return p ? p->intelSubgroupsSupport_ : false; }
1884 int Device::maxClockFrequency() const
1885 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1887 int Device::maxComputeUnits() const
1888 { return p ? p->maxComputeUnits_ : 0; }
1890 int Device::maxConstantArgs() const
1891 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1893 size_t Device::maxConstantBufferSize() const
1894 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1896 size_t Device::maxMemAllocSize() const
1897 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1899 size_t Device::maxParameterSize() const
1900 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1902 int Device::maxReadImageArgs() const
1903 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1905 int Device::maxWriteImageArgs() const
1906 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1908 int Device::maxSamplers() const
1909 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1911 size_t Device::maxWorkGroupSize() const
1912 { return p ? p->maxWorkGroupSize_ : 0; }
1914 int Device::maxWorkItemDims() const
1915 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1917 void Device::maxWorkItemSizes(size_t* sizes) const
1921 const int MAX_DIMS = 32;
1923 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1924 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1928 int Device::memBaseAddrAlign() const
1929 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1931 int Device::nativeVectorWidthChar() const
1932 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1934 int Device::nativeVectorWidthShort() const
1935 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1937 int Device::nativeVectorWidthInt() const
1938 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1940 int Device::nativeVectorWidthLong() const
1941 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1943 int Device::nativeVectorWidthFloat() const
1944 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1946 int Device::nativeVectorWidthDouble() const
1947 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1949 int Device::nativeVectorWidthHalf() const
1950 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1952 int Device::preferredVectorWidthChar() const
1953 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1955 int Device::preferredVectorWidthShort() const
1956 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1958 int Device::preferredVectorWidthInt() const
1959 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1961 int Device::preferredVectorWidthLong() const
1962 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
1964 int Device::preferredVectorWidthFloat() const
1965 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
1967 int Device::preferredVectorWidthDouble() const
1968 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
1970 int Device::preferredVectorWidthHalf() const
1971 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
1973 size_t Device::printfBufferSize() const
1974 #ifdef CL_VERSION_1_2
1975 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
1977 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1981 size_t Device::profilingTimerResolution() const
1982 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
1984 const Device& Device::getDefault()
1986 auto& c = OpenCLExecutionContext::getCurrent();
1989 return c.getDevice();
1992 static Device dummy;
1996 ////////////////////////////////////// Context ///////////////////////////////////////////////////
1998 template <typename Functor, typename ObjectType>
1999 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2002 cl_int err = f(obj, name, 0, NULL, &required);
2003 if (err != CL_SUCCESS)
2009 AutoBuffer<char> buf(required + 1);
2010 char* ptr = buf.data(); // cleanup is not needed
2011 err = f(obj, name, required, ptr, NULL);
2012 if (err != CL_SUCCESS)
2020 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2025 std::istringstream ss(s);
2029 std::getline(ss, item, delim);
2030 elems.push_back(item);
2034 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2036 // Sample: AMD:GPU:Tahiti
2037 // Sample: :GPU|CPU: = '' = ':' = '::'
2038 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2039 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2041 std::vector<std::string> parts;
2042 split(configurationStr, ':', parts);
2043 if (parts.size() > 3)
2045 CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr);
2048 if (parts.size() > 2)
2049 deviceNameOrID = parts[2];
2050 if (parts.size() > 1)
2052 split(parts[1], '|', deviceTypes);
2054 if (parts.size() > 0)
2056 platform = parts[0];
2061 #if defined WINRT || defined _WIN32_WCE
2062 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2064 CV_UNUSED(configuration)
2068 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2070 std::string platform, deviceName;
2071 std::vector<std::string> deviceTypes;
2074 configuration = getenv("OPENCV_OPENCL_DEVICE");
2076 if (configuration &&
2077 (strcmp(configuration, "disabled") == 0 ||
2078 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
2084 if (deviceName.length() == 1)
2085 // We limit ID range to 0..9, because we want to write:
2086 // - '2500' to mean i5-2500
2087 // - '8350' to mean AMD FX-8350
2088 // - '650' to mean GeForce 650
2089 // To extend ID range change condition to '> 0'
2092 for (size_t i = 0; i < deviceName.length(); i++)
2094 if (!isdigit(deviceName[i]))
2102 deviceID = atoi(deviceName.c_str());
2108 std::vector<cl_platform_id> platforms;
2110 cl_uint numPlatforms = 0;
2111 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
2113 if (numPlatforms == 0)
2115 platforms.resize((size_t)numPlatforms);
2116 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
2117 platforms.resize(numPlatforms);
2120 int selectedPlatform = -1;
2121 if (platform.length() > 0)
2123 for (size_t i = 0; i < platforms.size(); i++)
2126 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
2127 if (name.find(platform) != std::string::npos)
2129 selectedPlatform = (int)i;
2133 if (selectedPlatform == -1)
2135 CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform);
2139 if (deviceTypes.size() == 0)
2143 deviceTypes.push_back("GPU");
2145 deviceTypes.push_back("CPU");
2148 deviceTypes.push_back("ALL");
2150 for (size_t t = 0; t < deviceTypes.size(); t++)
2153 std::string tempStrDeviceType = deviceTypes[t];
2154 std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), details::char_tolower);
2156 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2157 deviceType = Device::TYPE_GPU;
2158 else if (tempStrDeviceType == "cpu")
2159 deviceType = Device::TYPE_CPU;
2160 else if (tempStrDeviceType == "accelerator")
2161 deviceType = Device::TYPE_ACCELERATOR;
2162 else if (tempStrDeviceType == "all")
2163 deviceType = Device::TYPE_ALL;
2166 CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]);
2170 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2171 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2172 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2176 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2177 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2179 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
2183 size_t base = devices.size();
2184 devices.resize(base + count);
2185 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2186 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2188 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
2192 for (size_t i = (isID ? deviceID : 0);
2193 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2197 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
2198 cl_bool useGPU = true;
2199 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2201 cl_bool isIGPU = CL_FALSE;
2202 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
2203 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2205 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2207 // TODO check for OpenCL 1.1
2215 return NULL; // suppress messages on stderr
2217 std::ostringstream msg;
2218 msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl
2219 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2220 << " Device types:";
2221 for (size_t t = 0; t < deviceTypes.size(); t++)
2222 msg << ' ' << deviceTypes[t];
2224 msg << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName);
2226 CV_LOG_ERROR(NULL, msg.str());
2231 #ifdef HAVE_OPENCL_SVM
2234 enum AllocatorFlags { // don't use first 16 bits
2235 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
2236 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
2237 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
2238 OPENCL_SVM_BUFFER_MASK = 3 << 16,
2239 OPENCL_SVM_BUFFER_MAP = 4 << 16
2242 static bool checkForceSVMUmatUsage()
2244 static bool initialized = false;
2245 static bool force = false;
2248 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
2253 static bool checkDisableSVMUMatUsage()
2255 static bool initialized = false;
2256 static bool force = false;
2259 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
2264 static bool checkDisableSVM()
2266 static bool initialized = false;
2267 static bool force = false;
2270 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
2275 // see SVMCapabilities
2276 static unsigned int getSVMCapabilitiesMask()
2278 static bool initialized = false;
2279 static unsigned int mask = 0;
2282 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
2283 if (envValue == NULL)
2285 return ~0U; // all bits 1
2287 mask = atoi(envValue);
2295 static size_t getProgramCountLimit()
2297 static bool initialized = false;
2298 static size_t count = 0;
2301 count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
2307 static int g_contextId = 0;
2309 class OpenCLBufferPoolImpl;
2310 class OpenCLSVMBufferPoolImpl;
2312 struct Context::Impl
2314 static Context::Impl* get(Context& context) { return context.p; }
2316 typedef std::deque<Context::Impl*> container_t;
2317 static container_t& getGlobalContainer()
2319 // never delete this container (Impl lifetime is greater due to TLS storage)
2320 static container_t* g_contexts = new container_t();
2325 Impl(const std::string& configuration_)
2327 , contextId(CV_XADD(&g_contextId, 1))
2328 , configuration(configuration_)
2330 #ifdef HAVE_OPENCL_SVM
2331 , svmInitialized(false)
2335 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
2337 cv::AutoLock lock(cv::getInitializationMutex());
2338 auto& container = getGlobalContainer();
2339 container.resize(std::max(container.size(), (size_t)contextId + 1));
2340 container[contextId] = this;
2346 if (!cv::__termination)
2351 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2358 cv::AutoLock lock(cv::getInitializationMutex());
2359 auto& container = getGlobalContainer();
2360 CV_CheckLT((size_t)contextId, container.size(), "");
2361 container[contextId] = NULL;
2365 void init_device_list()
2369 cl_uint ndevices = 0;
2370 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL));
2371 CV_Assert(ndevices > 0);
2373 cv::AutoBuffer<cl_device_id> cl_devices(ndevices);
2374 size_t devices_ret_size = 0;
2375 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size));
2376 CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), "");
2379 for (unsigned i = 0; i < ndevices; i++)
2381 devices.emplace_back(Device::fromHandle(cl_devices[i]));
2385 void __init_buffer_pools(); // w/o synchronization
2386 void _init_buffer_pools() const
2390 cv::AutoLock lock(cv::getInitializationMutex());
2393 const_cast<Impl*>(this)->__init_buffer_pools();
2398 static Impl* findContext(const std::string& configuration)
2400 CV_TRACE_FUNCTION();
2401 cv::AutoLock lock(cv::getInitializationMutex());
2402 auto& container = getGlobalContainer();
2403 if (configuration.empty() && !container.empty())
2404 return container[0];
2405 for (auto it = container.begin(); it != container.end(); ++it)
2408 if (i && i->configuration == configuration)
2416 static Impl* findOrCreateContext(const std::string& configuration_)
2418 CV_TRACE_FUNCTION();
2419 std::string configuration = configuration_;
2420 if (configuration_.empty())
2422 const char* c = getenv("OPENCV_OPENCL_DEVICE");
2426 Impl* impl = findContext(configuration);
2429 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2433 cl_device_id d = selectOpenCLDevice(configuration.empty() ? NULL : configuration.c_str());
2437 impl = new Impl(configuration);
2440 impl->createFromDevice(d);
2453 static Impl* findOrCreateContext(cl_context h)
2455 CV_TRACE_FUNCTION();
2459 std::string configuration = cv::format("@ctx-%p", (void*)h);
2460 Impl* impl = findContext(configuration);
2463 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2468 impl = new Impl(configuration);
2471 CV_OCL_CHECK(clRetainContext(h));
2473 impl->init_device_list();
2483 static Impl* findOrCreateContext(const ocl::Device& device)
2485 CV_TRACE_FUNCTION();
2487 CV_Assert(!device.empty());
2488 cl_device_id d = (cl_device_id)device.ptr();
2491 std::string configuration = cv::format("@dev-%p", (void*)d);
2492 Impl* impl = findContext(configuration);
2495 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2500 impl = new Impl(configuration);
2503 impl->createFromDevice(d);
2504 CV_Assert(impl->handle);
2516 CV_TRACE_FUNCTION();
2517 cl_device_id d = selectOpenCLDevice();
2522 createFromDevice(d);
2525 void createFromDevice(cl_device_id d)
2527 CV_TRACE_FUNCTION();
2528 CV_Assert(handle == NULL);
2530 cl_platform_id pl = NULL;
2531 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
2533 cl_context_properties prop[] =
2535 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2539 // !!! in the current implementation force the number of devices to 1 !!!
2543 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2544 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
2546 bool ok = handle != 0 && status == CL_SUCCESS;
2556 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2558 void unloadProg(Program& prog)
2560 cv::AutoLock lock(program_cache_mutex);
2561 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2563 phash_t::iterator it = phash.find(*i);
2564 if (it != phash.end())
2566 if (it->second.ptr() == prog.ptr())
2576 std::string& getPrefixString()
2580 cv::AutoLock lock(program_cache_mutex);
2583 CV_Assert(!devices.empty());
2584 const Device& d = devices[0];
2585 int bits = d.addressBits();
2586 if (bits > 0 && bits != 64)
2587 prefix = cv::format("%d-bit--", bits);
2588 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2590 for (size_t i = 0; i < prefix.size(); i++)
2593 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2603 std::string& getPrefixBase()
2605 if (prefix_base.empty())
2607 cv::AutoLock lock(program_cache_mutex);
2608 if (prefix_base.empty())
2610 const Device& d = devices[0];
2611 int bits = d.addressBits();
2612 if (bits > 0 && bits != 64)
2613 prefix_base = cv::format("%d-bit--", bits);
2614 prefix_base += d.vendorName() + "--" + d.name() + "--";
2616 for (size_t i = 0; i < prefix_base.size(); i++)
2618 char c = prefix_base[i];
2619 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2621 prefix_base[i] = '_';
2629 IMPLEMENT_REFCOUNTABLE();
2631 const int contextId; // global unique ID
2632 const std::string configuration;
2635 std::vector<Device> devices;
2638 std::string prefix_base;
2640 cv::Mutex program_cache_mutex;
2641 typedef std::map<std::string, Program> phash_t;
2643 typedef std::list<cv::String> CacheList;
2644 CacheList cacheList;
2646 std::shared_ptr<OpenCLBufferPoolImpl> bufferPool_;
2647 std::shared_ptr<OpenCLBufferPoolImpl> bufferPoolHostPtr_;
2648 OpenCLBufferPoolImpl& getBufferPool() const
2650 _init_buffer_pools();
2651 CV_DbgAssert(bufferPool_);
2652 return *bufferPool_.get();
2654 OpenCLBufferPoolImpl& getBufferPoolHostPtr() const
2656 _init_buffer_pools();
2657 CV_DbgAssert(bufferPoolHostPtr_);
2658 return *bufferPoolHostPtr_.get();
2661 #ifdef HAVE_OPENCL_SVM
2662 bool svmInitialized;
2665 svm::SVMCapabilities svmCapabilities;
2666 svm::SVMFunctions svmFunctions;
2670 CV_Assert(handle != NULL);
2671 const Device& device = devices[0];
2672 cl_device_svm_capabilities deviceCaps = 0;
2673 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2674 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2675 if (status != CL_SUCCESS)
2677 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2680 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2681 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2682 svmCapabilities.value_ =
2683 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2684 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2685 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2686 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2687 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2688 if (svmCapabilities.value_ == 0)
2690 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2696 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2697 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2700 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2701 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2706 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2707 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2709 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2710 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2715 ((int*)ptr)[0] = 100;
2719 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2722 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2724 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2725 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2730 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2735 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2736 clSVMFree(handle, ptr);
2739 clSVMFree(handle, ptr);
2740 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2741 svmFunctions.fn_clSVMFree = clSVMFree;
2742 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2743 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2744 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2745 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2746 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2747 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2748 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2752 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2755 // Try HSA extension
2756 String extensions = device.extensions();
2757 if (extensions.find("cl_amd_svm") == String::npos)
2759 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2762 cl_platform_id p = NULL;
2763 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2764 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2765 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2766 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2767 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2768 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2769 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2770 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2771 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2772 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2773 CV_Assert(svmFunctions.isValid());
2777 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2782 svmAvailable = true;
2783 svmEnabled = !svm::checkDisableSVM();
2784 svmInitialized = true;
2785 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2788 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2789 svmAvailable = false;
2791 svmCapabilities.value_ = 0;
2792 svmInitialized = true;
2793 svmFunctions.fn_clSVMAlloc = NULL;
2797 std::shared_ptr<OpenCLSVMBufferPoolImpl> bufferPoolSVM_;
2799 OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const
2801 _init_buffer_pools();
2802 CV_DbgAssert(bufferPoolSVM_);
2803 return *bufferPoolSVM_.get();
2807 friend class Program;
2822 Context::Context(int dtype)
2828 void Context::release()
2837 bool Context::create()
2842 p = Impl::findOrCreateContext(std::string());
2850 bool Context::create(int dtype)
2855 if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL)
2857 p = Impl::findOrCreateContext("");
2859 else if (dtype == CL_DEVICE_TYPE_GPU)
2861 p = Impl::findOrCreateContext(":GPU:");
2863 else if (dtype == CL_DEVICE_TYPE_CPU)
2865 p = Impl::findOrCreateContext(":CPU:");
2869 CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype);
2871 if (p && !p->handle)
2878 Context::Context(const Context& c)
2885 Context& Context::operator = (const Context& c)
2887 Impl* newp = (Impl*)c.p;
2896 void* Context::ptr() const
2898 return p == NULL ? NULL : p->handle;
2901 size_t Context::ndevices() const
2903 return p ? p->devices.size() : 0;
2906 Device& Context::device(size_t idx) const
2908 static Device dummy;
2909 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2912 Context& Context::getDefault(bool initialize)
2914 auto& c = OpenCLExecutionContext::getCurrent();
2917 auto& ctx = c.getContext();
2921 CV_UNUSED(initialize);
2922 static Context dummy;
2926 Program Context::getProg(const ProgramSource& prog,
2927 const String& buildopts, String& errmsg)
2929 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2932 void Context::unloadProg(Program& prog)
2935 p->unloadProg(prog);
2939 Context Context::fromHandle(void* context)
2942 ctx.p = Impl::findOrCreateContext((cl_context)context);
2947 Context Context::fromDevice(const ocl::Device& device)
2950 ctx.p = Impl::findOrCreateContext(device);
2955 Context Context::create(const std::string& configuration)
2958 ctx.p = Impl::findOrCreateContext(configuration);
2962 #ifdef HAVE_OPENCL_SVM
2963 bool Context::useSVM() const
2965 Context::Impl* i = p;
2967 if (!i->svmInitialized)
2969 return i->svmEnabled;
2971 void Context::setUseSVM(bool enabled)
2973 Context::Impl* i = p;
2975 if (!i->svmInitialized)
2977 if (enabled && !i->svmAvailable)
2979 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
2981 i->svmEnabled = enabled;
2984 bool Context::useSVM() const { return false; }
2985 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
2988 #ifdef HAVE_OPENCL_SVM
2991 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
2993 Context::Impl* i = context.p;
2995 if (!i->svmInitialized)
2997 return i->svmCapabilities;
3000 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
3002 Context::Impl* i = context.p;
3004 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
3005 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
3006 return &i->svmFunctions;
3009 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
3011 if (checkForceSVMUmatUsage())
3013 if (checkDisableSVMUMatUsage())
3015 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
3017 return false; // don't use SVM by default
3020 } // namespace cv::ocl::svm
3021 #endif // HAVE_OPENCL_SVM
3024 static void get_platform_name(cl_platform_id id, String& name)
3026 // get platform name string length
3028 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
3030 // get platform name string
3031 AutoBuffer<char> buf(sz + 1);
3032 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
3034 // just in case, ensure trailing zero for ASCIIZ string
3041 // Attaches OpenCL context to OpenCV
3043 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
3045 auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3050 OpenCLExecutionContext OpenCLExecutionContext::create(
3051 const std::string& platformName, void* platformID, void* context, void* deviceID
3055 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
3058 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
3061 CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!");
3063 std::vector<cl_platform_id> platforms(cnt);
3065 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
3067 bool platformAvailable = false;
3069 // check if external platformName contained in list of available platforms in OpenCV
3070 for (unsigned int i = 0; i < cnt; i++)
3072 String availablePlatformName;
3073 get_platform_name(platforms[i], availablePlatformName);
3074 // external platform is found in the list of available platforms
3075 if (platformName == availablePlatformName)
3077 platformAvailable = true;
3082 if (!platformAvailable)
3083 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3085 // check if platformID corresponds to platformName
3086 String actualPlatformName;
3087 get_platform_name((cl_platform_id)platformID, actualPlatformName);
3088 if (platformName != actualPlatformName)
3089 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3091 OpenCLExecutionContext ctx;
3092 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>((cl_platform_id)platformID, (cl_context)context, (cl_device_id)deviceID);
3093 CV_OCL_CHECK(clReleaseContext((cl_context)context));
3094 CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID));
3098 void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device)
3100 // internal call, less checks
3101 cl_platform_id platformID = (cl_platform_id)_platform;
3102 cl_context context = (cl_context)_context;
3103 cl_device_id deviceID = (cl_device_id)_device;
3105 std::string platformName = PlatformInfo(platformID).name();
3107 auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3108 CV_Assert(!clExecCtx.empty());
3109 ctx = clExecCtx.getContext();
3112 /////////////////////////////////////////// Queue /////////////////////////////////////////////
3116 inline void __init()
3120 isProfilingQueue_ = false;
3123 Impl(cl_command_queue q)
3128 cl_command_queue_properties props = 0;
3129 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
3130 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
3133 Impl(cl_command_queue q, bool isProfilingQueue)
3137 isProfilingQueue_ = isProfilingQueue;
3140 Impl(const Context& c, const Device& d, bool withProfiling = false)
3144 const Context* pc = &c;
3145 cl_context ch = (cl_context)pc->ptr();
3148 pc = &Context::getDefault();
3149 ch = (cl_context)pc->ptr();
3151 cl_device_id dh = (cl_device_id)d.ptr();
3153 dh = (cl_device_id)pc->device(0).ptr();
3155 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
3156 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
3157 isProfilingQueue_ = withProfiling;
3163 if (!cv::__termination)
3168 CV_OCL_DBG_CHECK(clFinish(handle));
3169 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
3175 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
3177 if (isProfilingQueue_)
3180 if (profiling_queue_.ptr())
3181 return profiling_queue_;
3184 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
3186 cl_device_id device = 0;
3187 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
3189 cl_int result = CL_SUCCESS;
3190 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
3191 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
3192 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
3195 queue.p = new Impl(q, true);
3196 profiling_queue_ = queue;
3198 return profiling_queue_;
3201 IMPLEMENT_REFCOUNTABLE();
3203 cl_command_queue handle;
3204 bool isProfilingQueue_;
3205 cv::ocl::Queue profiling_queue_;
3213 Queue::Queue(const Context& c, const Device& d)
3219 Queue::Queue(const Queue& q)
3226 Queue& Queue::operator = (const Queue& q)
3228 Impl* newp = (Impl*)q.p;
3243 bool Queue::create(const Context& c, const Device& d)
3248 return p->handle != 0;
3251 void Queue::finish()
3255 CV_OCL_DBG_CHECK(clFinish(p->handle));
3259 const Queue& Queue::getProfilingQueue() const
3262 return p->getProfilingQueue(*this);
3265 void* Queue::ptr() const
3267 return p ? p->handle : 0;
3270 Queue& Queue::getDefault()
3272 auto& c = OpenCLExecutionContext::getCurrent();
3275 auto& q = c.getQueue();
3282 static cl_command_queue getQueue(const Queue& q)
3284 cl_command_queue qq = (cl_command_queue)q.ptr();
3286 qq = (cl_command_queue)Queue::getDefault().ptr();
3290 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
3292 KernelArg::KernelArg()
3293 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
3297 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
3298 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
3300 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
3303 KernelArg KernelArg::Constant(const Mat& m)
3305 CV_Assert(m.isContinuous());
3306 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
3309 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
3313 Impl(const char* kname, const Program& prog) :
3314 refcount(1), handle(NULL), isInProgress(false), nu(0)
3316 cl_program ph = (cl_program)prog.ptr();
3321 handle = clCreateKernel(ph, kname, &retval);
3322 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
3324 for( int i = 0; i < MAX_ARRS; i++ )
3326 haveTempDstUMats = false;
3327 haveTempSrcUMats = false;
3332 for( int i = 0; i < MAX_ARRS; i++ )
3335 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
3337 u[i]->flags |= UMatData::ASYNC_CLEANUP;
3338 u[i]->currAllocator->deallocate(u[i]);
3343 haveTempDstUMats = false;
3344 haveTempSrcUMats = false;
3347 void addUMat(const UMat& m, bool dst)
3349 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
3351 CV_XADD(&m.u->urefcount, 1);
3353 if(dst && m.u->tempUMat())
3354 haveTempDstUMats = true;
3355 if(m.u->originalUMatData == NULL && m.u->tempUMat())
3356 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
3359 void addImage(const Image2D& image)
3361 images.push_back(image);
3364 void finit(cl_event e)
3369 isInProgress = false;
3373 bool run(int dims, size_t _globalsize[], size_t _localsize[],
3374 bool sync, int64* timeNS, const Queue& q);
3380 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
3384 IMPLEMENT_REFCOUNTABLE();
3388 enum { MAX_ARRS = 16 };
3389 UMatData* u[MAX_ARRS];
3392 std::list<Image2D> images;
3393 bool haveTempDstUMats;
3394 bool haveTempSrcUMats;
3397 }} // namespace cv::ocl
3401 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
3405 ((cv::ocl::Kernel::Impl*)p)->finit(e);
3407 catch (const cv::Exception& exc)
3409 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
3411 catch (const std::exception& exc)
3413 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
3417 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
3423 namespace cv { namespace ocl {
3430 Kernel::Kernel(const char* kname, const Program& prog)
3433 create(kname, prog);
3436 Kernel::Kernel(const char* kname, const ProgramSource& src,
3437 const String& buildopts, String* errmsg)
3440 create(kname, src, buildopts, errmsg);
3443 Kernel::Kernel(const Kernel& k)
3450 Kernel& Kernel::operator = (const Kernel& k)
3452 Impl* newp = (Impl*)k.p;
3467 bool Kernel::create(const char* kname, const Program& prog)
3471 p = new Impl(kname, prog);
3477 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
3483 bool Kernel::create(const char* kname, const ProgramSource& src,
3484 const String& buildopts, String* errmsg)
3492 if( !errmsg ) errmsg = &tempmsg;
3493 const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
3494 return create(kname, prog);
3497 void* Kernel::ptr() const
3499 return p ? p->handle : 0;
3502 bool Kernel::empty() const
3507 int Kernel::set(int i, const void* value, size_t sz)
3509 if (!p || !p->handle)
3516 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
3517 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%p)", p->name.c_str(), (int)i, (int)sz, (void*)value).c_str());
3518 if (retval != CL_SUCCESS)
3523 int Kernel::set(int i, const Image2D& image2D)
3525 p->addImage(image2D);
3526 cl_mem h = (cl_mem)image2D.ptr();
3527 return set(i, &h, sizeof(h));
3530 int Kernel::set(int i, const UMat& m)
3532 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
3535 int Kernel::set(int i, const KernelArg& arg)
3537 if( !p || !p->handle )
3541 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
3542 p->name.c_str(), (int)i));
3550 AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) |
3551 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0));
3552 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
3553 if (ptronly && arg.m->empty())
3555 cl_mem h_null = (cl_mem)NULL;
3556 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3557 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3560 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3564 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3565 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3571 #ifdef HAVE_OPENCL_SVM
3572 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3574 const Context& ctx = Context::getDefault();
3575 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3576 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3577 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3579 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3581 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3583 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3588 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3589 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3596 else if( arg.m->dims <= 2 )
3599 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3600 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3601 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3602 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3605 if( !(arg.flags & KernelArg::NO_SIZE) )
3607 int cols = u2d.cols*arg.wscale/arg.iwscale;
3608 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3609 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3610 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3611 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3618 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3619 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3620 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3621 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3622 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3623 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3625 if( !(arg.flags & KernelArg::NO_SIZE) )
3627 int cols = u3d.cols*arg.wscale/arg.iwscale;
3628 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3629 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3630 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3631 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3632 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3633 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3637 p->addUMat(*arg.m, !!(accessFlags & ACCESS_WRITE));
3640 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3641 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3645 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3646 bool sync, const Queue& q)
3651 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3653 CV_Assert(_globalsize != NULL);
3654 for (int i = 0; i < dims; i++)
3656 size_t val = _localsize ? _localsize[i] :
3657 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3658 CV_Assert( val > 0 );
3659 total *= _globalsize[i];
3660 if (_globalsize[i] == 1 && !_localsize)
3662 globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3664 CV_Assert(total > 0);
3666 return p->run(dims, globalsize, _localsize, sync, NULL, q);
3670 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3671 bool sync, int64* timeNS, const Queue& q)
3673 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3675 if (!handle || isInProgress)
3678 cl_command_queue qq = getQueue(q);
3679 if (haveTempDstUMats)
3681 if (haveTempSrcUMats)
3685 cl_event asyncEvent = 0;
3686 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3687 NULL, globalsize, localsize, 0, 0,
3688 (sync && !timeNS) ? 0 : &asyncEvent);
3689 #if !CV_OPENCL_SHOW_RUN_KERNELS
3690 if (retval != CL_SUCCESS)
3693 cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%zux%zux%zu, localsize=%s) sync=%s", name.c_str(), (int)dims,
3694 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3695 (localsize ? cv::format("%zux%zux%zu", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3696 sync ? "true" : "false"
3698 if (retval != CL_SUCCESS)
3700 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3702 #if CV_OPENCL_TRACE_CHECK
3703 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3705 printf("%s\n", msg.c_str());
3709 if (sync || retval != CL_SUCCESS)
3711 CV_OCL_DBG_CHECK(clFinish(qq));
3714 if (retval == CL_SUCCESS)
3716 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3717 cl_ulong startTime, stopTime;
3718 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3719 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3720 *timeNS = (int64)(stopTime - startTime);
3732 isInProgress = true;
3733 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3736 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3737 return retval == CL_SUCCESS;
3740 bool Kernel::runTask(bool sync, const Queue& q)
3742 if(!p || !p->handle || p->isInProgress)
3745 cl_command_queue qq = getQueue(q);
3746 cl_event asyncEvent = 0;
3747 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3748 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3749 if (sync || retval != CL_SUCCESS)
3751 CV_OCL_DBG_CHECK(clFinish(qq));
3757 p->isInProgress = true;
3758 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3761 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3762 return retval == CL_SUCCESS;
3765 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3767 CV_Assert(p && p->handle && !p->isInProgress);
3768 Queue q = q_.ptr() ? q_ : Queue::getDefault();
3770 q.finish(); // call clFinish() on base queue
3771 Queue profilingQueue = q.getProfilingQueue();
3773 bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3774 return res ? timeNs : -1;
3777 size_t Kernel::workGroupSize() const
3779 if(!p || !p->handle)
3781 size_t val = 0, retsz = 0;
3782 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3783 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3784 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3785 return status == CL_SUCCESS ? val : 0;
3788 size_t Kernel::preferedWorkGroupSizeMultiple() const
3790 if(!p || !p->handle)
3792 size_t val = 0, retsz = 0;
3793 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3794 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3795 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3796 return status == CL_SUCCESS ? val : 0;
3799 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3801 if(!p || !p->handle || !wsz)
3804 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3805 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3806 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3807 return status == CL_SUCCESS;
3810 size_t Kernel::localMemSize() const
3812 if(!p || !p->handle)
3816 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3817 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
3818 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
3819 return status == CL_SUCCESS ? (size_t)val : 0;
3824 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3826 struct ProgramSource::Impl
3828 IMPLEMENT_REFCOUNTABLE();
3831 PROGRAM_SOURCE_CODE = 0,
3837 Impl(const String& src)
3839 init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
3840 initFromSource(src, cv::String());
3842 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
3844 init(PROGRAM_SOURCE_CODE, module, name);
3845 initFromSource(codeStr, codeHash);
3849 void init(enum KIND kind, const String& module, const String& name)
3858 isHashUpdated = false;
3861 void initFromSource(const String& codeStr, const String& codeHash)
3864 sourceHash_ = codeHash;
3865 if (sourceHash_.empty())
3871 isHashUpdated = true;
3875 void updateHash(const char* hashStr = NULL)
3879 sourceHash_ = cv::String(hashStr);
3880 isHashUpdated = true;
3886 case PROGRAM_SOURCE_CODE:
3889 CV_Assert(codeStr_.empty());
3890 hash = crc64(sourceAddr_, sourceSize_); // static storage
3894 CV_Assert(!codeStr_.empty());
3895 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
3898 case PROGRAM_BINARIES:
3901 hash = crc64(sourceAddr_, sourceSize_);
3904 CV_Error(Error::StsInternal, "Internal error");
3906 sourceHash_ = cv::format("%08jx", (uintmax_t)hash);
3907 isHashUpdated = true;
3910 Impl(enum KIND kind,
3911 const String& module, const String& name,
3912 const unsigned char* binary, const size_t size,
3913 const cv::String& buildOptions = cv::String())
3915 init(kind, module, name);
3917 sourceAddr_ = binary;
3920 buildOptions_ = buildOptions;
3923 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
3924 const char* sourceCodeStaticStr, const char* hashStaticStr,
3925 const cv::String& buildOptions)
3927 ProgramSource result;
3928 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
3929 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
3930 result.p->updateHash(hashStaticStr);
3934 static ProgramSource fromBinary(const String& module, const String& name,
3935 const unsigned char* binary, const size_t size,
3936 const cv::String& buildOptions)
3938 ProgramSource result;
3939 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
3943 static ProgramSource fromSPIR(const String& module, const String& name,
3944 const unsigned char* binary, const size_t size,
3945 const cv::String& buildOptions)
3947 ProgramSource result;
3948 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
3955 // TODO std::vector<ProgramSource> includes_;
3956 String codeStr_; // PROGRAM_SOURCE_CODE only
3958 const unsigned char* sourceAddr_;
3961 cv::String buildOptions_;
3966 friend struct Program::Impl;
3967 friend struct internal::ProgramEntry;
3968 friend struct Context::Impl;
3972 ProgramSource::ProgramSource()
3977 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
3979 p = new Impl(module, name, codeStr, codeHash);
3982 ProgramSource::ProgramSource(const char* prog)
3987 ProgramSource::ProgramSource(const String& prog)
3992 ProgramSource::~ProgramSource()
3998 ProgramSource::ProgramSource(const ProgramSource& prog)
4005 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
4007 Impl* newp = (Impl*)prog.p;
4016 const String& ProgramSource::source() const
4019 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
4020 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
4024 ProgramSource::hash_t ProgramSource::hash() const
4026 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
4029 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
4030 const unsigned char* binary, const size_t size,
4031 const cv::String& buildOptions)
4034 CV_Assert(size > 0);
4035 return Impl::fromBinary(module, name, binary, size, buildOptions);
4038 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
4039 const unsigned char* binary, const size_t size,
4040 const cv::String& buildOptions)
4043 CV_Assert(size > 0);
4044 return Impl::fromBinary(module, name, binary, size, buildOptions);
4048 internal::ProgramEntry::operator ProgramSource&() const
4050 if (this->pProgramSource == NULL)
4052 cv::AutoLock lock(cv::getInitializationMutex());
4053 if (this->pProgramSource == NULL)
4055 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
4056 ProgramSource* ptr = new ProgramSource(ps);
4057 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
4060 return *this->pProgramSource;
4065 /////////////////////////////////////////// Program /////////////////////////////////////////////
4068 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
4076 return a + (cv::String(" ") + b);
4079 struct Program::Impl
4081 IMPLEMENT_REFCOUNTABLE();
4083 Impl(const ProgramSource& src,
4084 const String& _buildflags, String& errmsg) :
4087 buildflags(_buildflags)
4089 const ProgramSource::Impl* src_ = src.getImpl();
4091 sourceModule_ = src_->module_;
4092 sourceName_ = src_->name_;
4093 const Context ctx = Context::getDefault();
4094 Device device = ctx.device(0);
4095 if (ctx.ptr() == NULL || device.ptr() == NULL)
4097 buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
4098 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4101 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
4102 else if (device.isIntel())
4103 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
4104 const String param_buildExtraOptions = getBuildExtraOptions();
4105 if (!param_buildExtraOptions.empty())
4106 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
4108 compile(ctx, src_, errmsg);
4111 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4113 CV_Assert(ctx.getImpl());
4116 // We don't cache OpenCL binaries
4117 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
4119 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4120 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4123 return compileWithCache(ctx, src_, errmsg);
4126 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4128 CV_Assert(ctx.getImpl());
4130 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
4132 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4133 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
4134 const std::string base_dir = config.prepareCacheDirectoryForContext(
4135 ctx.getImpl()->getPrefixString(),
4136 ctx.getImpl()->getPrefixBase()
4138 const String& hash_str = src_->sourceHash_;
4140 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
4142 CV_Assert(!hash_str.empty());
4143 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
4144 fname = utils::fs::join(base_dir, fname);
4146 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
4147 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
4151 std::vector<char> binaryBuf;
4154 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4155 BinaryProgramFile file(fname, hash_str.c_str());
4156 res = file.read(buildflags, binaryBuf);
4160 CV_Assert(!binaryBuf.empty());
4161 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
4162 bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
4167 catch (const cv::Exception& e)
4170 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
4174 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
4177 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4178 CV_Assert(handle == NULL);
4179 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4181 if (!buildFromSources(ctx, src_, errmsg))
4186 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
4188 buildflags = joinBuildOptions(buildflags, " -x spir");
4189 if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
4191 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
4193 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4194 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4198 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
4200 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
4204 CV_Error(Error::StsInternal, "Internal error");
4206 CV_Assert(handle != NULL);
4207 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4208 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
4212 std::vector<char> binaryBuf;
4213 getProgramBinary(binaryBuf);
4215 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4216 BinaryProgramFile file(fname, hash_str.c_str());
4217 file.write(buildflags, binaryBuf);
4220 catch (const cv::Exception& e)
4222 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
4226 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
4229 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4230 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4231 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4233 std::vector<char> binaryBuf;
4234 getProgramBinary(binaryBuf);
4235 if (!binaryBuf.empty())
4237 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4239 createFromBinary(ctx, binaryBuf, errmsg);
4243 return handle != NULL;
4246 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
4248 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
4251 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4252 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
4253 if (log_retval == CL_SUCCESS && retsz > 1)
4255 buffer.resize(retsz + 16);
4256 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4257 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
4258 if (log_retval == CL_SUCCESS)
4260 if (retsz < buffer.size())
4263 buffer[buffer.size() - 1] = 0;
4271 errmsg = String(buffer.data());
4272 printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
4273 sourceModule_.c_str(), sourceName_.c_str(),
4274 result, getOpenCLErrorString(result),
4275 buildflags.c_str(), errmsg.c_str());
4279 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4282 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
4283 CV_Assert(handle == NULL);
4284 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
4285 sourceModule_.c_str(), sourceName_.c_str(),
4286 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
4288 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
4290 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
4291 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
4292 CV_Assert(srcptr != NULL);
4293 CV_Assert(srclen > 0);
4297 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
4298 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
4299 CV_Assert(handle || retval != CL_SUCCESS);
4300 if (handle && retval == CL_SUCCESS)
4302 size_t n = ctx.ndevices();
4303 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
4304 cl_device_id* deviceList = deviceListBuf.data();
4305 for (size_t i = 0; i < n; i++)
4307 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
4310 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
4311 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
4312 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
4313 if (retval != CL_SUCCESS)
4316 dumpBuildLog_(retval, deviceList, errmsg);
4318 // don't remove "retval != CL_SUCCESS" condition here:
4319 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
4320 if (retval != CL_SUCCESS && handle)
4322 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4326 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4327 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4329 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
4331 char kernels_buffer[4096] = {0};
4332 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4333 if (retsz < sizeof(kernels_buffer))
4334 kernels_buffer[retsz] = 0;
4336 kernels_buffer[0] = 0;
4337 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4342 return handle != NULL;
4345 void getProgramBinary(std::vector<char>& buf)
4349 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
4351 uchar* ptr = (uchar*)&buf[0];
4352 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
4355 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
4357 return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
4360 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
4362 CV_Assert(handle == NULL);
4363 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
4364 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
4366 CV_Assert(binarySize > 0);
4368 size_t ndevices = (int)ctx.ndevices();
4369 AutoBuffer<cl_device_id> devices_(ndevices);
4370 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
4371 AutoBuffer<size_t> binarySizes_(ndevices);
4373 cl_device_id* devices = devices_.data();
4374 const uchar** binaryPtrs = binaryPtrs_.data();
4375 size_t* binarySizes = binarySizes_.data();
4376 for (size_t i = 0; i < ndevices; i++)
4378 devices[i] = (cl_device_id)ctx.device(i).ptr();
4379 binaryPtrs[i] = binaryAddr;
4380 binarySizes[i] = binarySize;
4384 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
4385 binarySizes, binaryPtrs, NULL, &result);
4386 if (result != CL_SUCCESS)
4388 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
4391 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4399 // call clBuildProgram()
4401 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
4402 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
4403 if (result != CL_SUCCESS)
4405 dumpBuildLog_(result, devices, errmsg);
4408 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4414 // check build status
4416 cl_build_status build_status = CL_BUILD_NONE;
4418 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
4419 sizeof(build_status), &build_status, &retsz));
4420 if (result == CL_SUCCESS)
4422 if (build_status == CL_BUILD_SUCCESS)
4428 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
4434 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
4437 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4442 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4443 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4445 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
4447 char kernels_buffer[4096] = {0};
4448 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4449 if (retsz < sizeof(kernels_buffer))
4450 kernels_buffer[retsz] = 0;
4452 kernels_buffer[0] = 0;
4453 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4456 return handle != NULL;
4464 if (!cv::__termination)
4467 clReleaseProgram(handle);
4476 String sourceModule_;
4481 Program::Program() { p = 0; }
4483 Program::Program(const ProgramSource& src,
4484 const String& buildflags, String& errmsg)
4487 create(src, buildflags, errmsg);
4490 Program::Program(const Program& prog)
4497 Program& Program::operator = (const Program& prog)
4499 Impl* newp = (Impl*)prog.p;
4514 bool Program::create(const ProgramSource& src,
4515 const String& buildflags, String& errmsg)
4522 p = new Impl(src, buildflags, errmsg);
4531 void* Program::ptr() const
4533 return p ? p->handle : 0;
4536 #ifndef OPENCV_REMOVE_DEPRECATED_API
4537 const ProgramSource& Program::source() const
4539 CV_Error(Error::StsNotImplemented, "Removed API");
4542 bool Program::read(const String& bin, const String& buildflags)
4544 CV_UNUSED(bin); CV_UNUSED(buildflags);
4545 CV_Error(Error::StsNotImplemented, "Removed API");
4548 bool Program::write(String& bin) const
4551 CV_Error(Error::StsNotImplemented, "Removed API");
4554 String Program::getPrefix() const
4558 Context::Impl* ctx_ = Context::getDefault().getImpl();
4560 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4563 String Program::getPrefix(const String& buildflags)
4565 Context::Impl* ctx_ = Context::getDefault().getImpl();
4567 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4569 #endif // OPENCV_REMOVE_DEPRECATED_API
4571 void Program::getBinary(std::vector<char>& binary) const
4573 CV_Assert(p && "Empty program");
4574 p->getProgramBinary(binary);
4577 Program Context::Impl::getProg(const ProgramSource& src,
4578 const String& buildflags, String& errmsg)
4580 size_t limit = getProgramCountLimit();
4581 const ProgramSource::Impl* src_ = src.getImpl();
4583 String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4584 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4585 getPrefixString().c_str(),
4586 buildflags.c_str());
4588 cv::AutoLock lock(program_cache_mutex);
4589 phash_t::iterator it = phash.find(key);
4590 if (it != phash.end())
4593 CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4594 if (i != cacheList.end() && i != cacheList.begin())
4597 cacheList.push_front(key);
4601 { // cleanup program cache
4602 size_t sz = phash.size();
4603 if (limit > 0 && sz >= limit)
4605 static bool warningFlag = false;
4608 printf("\nWARNING: OpenCV-OpenCL:\n"
4609 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4610 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4613 while (!cacheList.empty())
4615 size_t c = phash.erase(cacheList.back());
4616 cacheList.pop_back();
4623 Program prog(src, buildflags, errmsg);
4624 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4626 cv::AutoLock lock(program_cache_mutex);
4627 phash.insert(std::pair<std::string, Program>(key, prog));
4628 cacheList.push_front(key);
4634 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4636 template<typename T>
4637 class OpenCLBufferPool
4640 ~OpenCLBufferPool() { }
4642 virtual T allocate(size_t size) = 0;
4643 virtual void release(T buffer) = 0;
4646 template <typename Derived, typename BufferEntry, typename T>
4647 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4650 inline Derived& derived() { return *static_cast<Derived*>(this); }
4654 size_t currentReservedSize;
4655 size_t maxReservedSize;
4657 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4658 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4661 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4663 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4664 for (; i != allocatedEntries_.end(); ++i)
4666 BufferEntry& e = *i;
4667 if (e.clBuffer_ == buffer)
4670 allocatedEntries_.erase(i);
4678 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4680 if (reservedEntries_.empty())
4682 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4683 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4685 size_t minDiff = (size_t)(-1);
4686 for (; i != reservedEntries_.end(); ++i)
4688 BufferEntry& e = *i;
4689 if (e.capacity_ >= size)
4691 size_t diff = e.capacity_ - size;
4692 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4702 if (result_pos != reservedEntries_.end())
4704 //CV_DbgAssert(result == *result_pos);
4705 reservedEntries_.erase(result_pos);
4707 currentReservedSize -= entry.capacity_;
4708 allocatedEntries_.push_back(entry);
4715 void _checkSizeOfReservedEntries()
4717 while (currentReservedSize > maxReservedSize)
4719 CV_DbgAssert(!reservedEntries_.empty());
4720 const BufferEntry& entry = reservedEntries_.back();
4721 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4722 currentReservedSize -= entry.capacity_;
4723 derived()._releaseBufferEntry(entry);
4724 reservedEntries_.pop_back();
4728 inline size_t _allocationGranularity(size_t size)
4731 if (size < 1024*1024)
4732 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4733 else if (size < 16*1024*1024)
4740 OpenCLBufferPoolBaseImpl()
4741 : currentReservedSize(0),
4746 virtual ~OpenCLBufferPoolBaseImpl()
4748 freeAllReservedBuffers();
4749 CV_Assert(reservedEntries_.empty());
4752 virtual T allocate(size_t size) CV_OVERRIDE
4754 AutoLock locker(mutex_);
4756 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4758 CV_DbgAssert(size <= entry.capacity_);
4759 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4763 derived()._allocateBufferEntry(entry, size);
4765 return entry.clBuffer_;
4767 virtual void release(T buffer) CV_OVERRIDE
4769 AutoLock locker(mutex_);
4771 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
4772 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
4774 derived()._releaseBufferEntry(entry);
4778 reservedEntries_.push_front(entry);
4779 currentReservedSize += entry.capacity_;
4780 _checkSizeOfReservedEntries();
4784 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
4785 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
4786 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
4788 AutoLock locker(mutex_);
4789 size_t oldMaxReservedSize = maxReservedSize;
4790 maxReservedSize = size;
4791 if (maxReservedSize < oldMaxReservedSize)
4793 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4794 for (; i != reservedEntries_.end();)
4796 const BufferEntry& entry = *i;
4797 if (entry.capacity_ > maxReservedSize / 8)
4799 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4800 currentReservedSize -= entry.capacity_;
4801 derived()._releaseBufferEntry(entry);
4802 i = reservedEntries_.erase(i);
4807 _checkSizeOfReservedEntries();
4810 virtual void freeAllReservedBuffers() CV_OVERRIDE
4812 AutoLock locker(mutex_);
4813 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
4814 for (; i != reservedEntries_.end(); ++i)
4816 const BufferEntry& entry = *i;
4817 derived()._releaseBufferEntry(entry);
4819 reservedEntries_.clear();
4820 currentReservedSize = 0;
4824 struct CLBufferEntry
4828 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
4831 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
4834 typedef struct CLBufferEntry BufferEntry;
4838 OpenCLBufferPoolImpl(int createFlags = 0)
4839 : createFlags_(createFlags)
4843 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4845 CV_DbgAssert(entry.clBuffer_ == NULL);
4846 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4847 Context& ctx = Context::getDefault();
4848 cl_int retval = CL_SUCCESS;
4849 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
4850 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
4851 CV_Assert(entry.clBuffer_ != NULL);
4852 if(retval == CL_SUCCESS)
4854 CV_IMPL_ADD(CV_IMPL_OCL);
4856 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
4857 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4858 allocatedEntries_.push_back(entry);
4861 void _releaseBufferEntry(const BufferEntry& entry)
4863 CV_Assert(entry.capacity_ != 0);
4864 CV_Assert(entry.clBuffer_ != NULL);
4865 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
4866 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4867 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
4871 #ifdef HAVE_OPENCL_SVM
4872 struct CLSVMBufferEntry
4876 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
4878 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
4881 typedef struct CLSVMBufferEntry BufferEntry;
4883 OpenCLSVMBufferPoolImpl()
4887 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4889 CV_DbgAssert(entry.clBuffer_ == NULL);
4890 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4892 Context& ctx = Context::getDefault();
4893 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4894 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4895 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
4896 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4898 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4899 CV_DbgAssert(svmFns->isValid());
4901 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
4902 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
4905 entry.clBuffer_ = buf;
4907 CV_IMPL_ADD(CV_IMPL_OCL);
4909 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
4910 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4911 allocatedEntries_.push_back(entry);
4914 void _releaseBufferEntry(const BufferEntry& entry)
4916 CV_Assert(entry.capacity_ != 0);
4917 CV_Assert(entry.clBuffer_ != NULL);
4918 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
4919 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4920 Context& ctx = Context::getDefault();
4921 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4922 CV_DbgAssert(svmFns->isValid());
4923 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
4924 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
4931 template <bool readAccess, bool writeAccess>
4932 class AlignedDataPtr
4936 uchar* const originPtr_;
4937 const size_t alignment_;
4939 uchar* allocatedPtr_;
4942 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
4943 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
4945 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4946 CV_DbgAssert(!readAccess || ptr);
4947 if (((size_t)ptr_ & (alignment - 1)) != 0)
4949 allocatedPtr_ = new uchar[size_ + alignment - 1];
4950 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4953 memcpy(ptr_, originPtr_, size_);
4958 uchar* getAlignedPtr() const
4960 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
4970 memcpy(originPtr_, ptr_, size_);
4972 delete[] allocatedPtr_;
4973 allocatedPtr_ = NULL;
4978 AlignedDataPtr(const AlignedDataPtr&); // disabled
4979 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
4982 template <bool readAccess, bool writeAccess>
4983 class AlignedDataPtr2D
4987 uchar* const originPtr_;
4988 const size_t alignment_;
4990 uchar* allocatedPtr_;
4996 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
4997 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
4999 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5000 CV_DbgAssert(!readAccess || ptr != NULL);
5001 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
5003 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
5004 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5007 for (size_t i = 0; i < rows_; i++)
5008 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
5013 uchar* getAlignedPtr() const
5015 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5025 for (size_t i = 0; i < rows_; i++)
5026 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
5028 delete[] allocatedPtr_;
5029 allocatedPtr_ = NULL;
5034 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
5035 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
5038 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
5039 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
5043 void Context::Impl::__init_buffer_pools()
5045 bufferPool_ = std::make_shared<OpenCLBufferPoolImpl>(0);
5046 OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get();
5047 bufferPoolHostPtr_ = std::make_shared<OpenCLBufferPoolImpl>(CL_MEM_ALLOC_HOST_PTR);
5048 OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get();
5050 size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
5051 size_t poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
5052 bufferPool.setMaxReservedSize(poolSize);
5053 size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
5054 bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr);
5056 #ifdef HAVE_OPENCL_SVM
5057 bufferPoolSVM_ = std::make_shared<OpenCLSVMBufferPoolImpl>();
5058 OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get();
5059 size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
5060 bufferPoolSVM.setMaxReservedSize(poolSizeSVM);
5063 CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr);
5066 class OpenCLAllocator CV_FINAL : public MatAllocator
5071 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
5072 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
5073 #ifdef HAVE_OPENCL_SVM
5074 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
5076 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
5081 matStdAllocator = Mat::getDefaultAllocator();
5085 flushCleanupQueue();
5088 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
5089 AccessFlag flags, UMatUsageFlags usageFlags) const
5091 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
5095 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
5097 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
5100 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
5102 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
5106 void getBestFlags(const Context& ctx, AccessFlag /*flags*/, UMatUsageFlags usageFlags, int& createFlags, UMatData::MemoryFlag& flags0) const
5108 const Device& dev = ctx.device(0);
5110 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
5111 createFlags |= CL_MEM_ALLOC_HOST_PTR;
5113 if (!isOpenCLCopyingForced() &&
5114 (isOpenCLMapForced() ||
5115 (dev.hostUnifiedMemory()
5122 flags0 = static_cast<UMatData::MemoryFlag>(0);
5124 flags0 = UMatData::COPY_ON_MAP;
5127 UMatData* allocate(int dims, const int* sizes, int type,
5128 void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5131 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5133 flushCleanupQueue();
5135 CV_Assert(data == 0);
5136 size_t total = CV_ELEM_SIZE(type);
5137 for( int i = dims-1; i >= 0; i-- )
5144 Context& ctx = Context::getDefault();
5146 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5147 Context::Impl& ctxImpl = *ctx.getImpl();
5149 int createFlags = 0;
5150 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5151 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
5153 void* handle = NULL;
5154 int allocatorFlags = 0;
5156 #ifdef HAVE_OPENCL_SVM
5157 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5158 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
5160 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
5161 handle = ctxImpl.getBufferPoolSVM().allocate(total);
5163 // this property is constant, so single buffer pool can be used here
5164 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5165 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5169 if (createFlags == 0)
5171 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
5172 handle = ctxImpl.getBufferPool().allocate(total);
5174 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
5176 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
5177 handle = ctxImpl.getBufferPoolHostPtr().allocate(total);
5181 CV_Assert(handle != NULL); // Unsupported, throw
5185 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5187 UMatData* u = new UMatData(this);
5192 u->allocatorFlags_ = allocatorFlags;
5193 u->allocatorContext = std::static_pointer_cast<void>(std::make_shared<ocl::Context>(ctx));
5194 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
5195 u->markHostCopyObsolete(true);
5196 opencl_allocator_stats.onAllocate(u->size);
5200 bool allocate(UMatData* u, AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5205 flushCleanupQueue();
5207 UMatDataAutoLock lock(u);
5211 CV_Assert(u->origdata != 0);
5212 Context& ctx = Context::getDefault();
5213 int createFlags = 0;
5214 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5215 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
5217 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
5219 cl_context ctx_handle = (cl_context)ctx.ptr();
5220 int allocatorFlags = 0;
5221 UMatData::MemoryFlag tempUMatFlags = static_cast<UMatData::MemoryFlag>(0);
5222 void* handle = NULL;
5223 cl_int retval = CL_SUCCESS;
5225 #ifdef HAVE_OPENCL_SVM
5226 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5227 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
5228 if (useSVM && svmCaps.isSupportFineGrainSystem())
5230 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
5231 tempUMatFlags = UMatData::TEMP_UMAT;
5232 handle = u->origdata;
5233 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
5235 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
5237 if (!(accessFlags & ACCESS_FAST)) // memcpy used
5239 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5241 cl_svm_mem_flags memFlags = createFlags |
5242 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5244 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5245 CV_DbgAssert(svmFns->isValid());
5247 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
5248 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
5251 cl_command_queue q = NULL;
5252 if (!isFineGrainBuffer)
5254 q = (cl_command_queue)Queue::getDefault().ptr();
5255 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
5256 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
5259 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5262 memcpy(handle, u->origdata, u->size);
5263 if (!isFineGrainBuffer)
5265 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
5266 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
5267 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5270 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
5271 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
5272 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5279 accessFlags &= ~ACCESS_FAST;
5281 tempUMatFlags = UMatData::TEMP_UMAT;
5286 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
5287 // There are OpenCL runtime issues for less aligned data
5288 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
5289 && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
5290 // Avoid sharing of host memory between OpenCL buffers
5291 && !(u->originalUMatData && u->originalUMatData->handle)
5294 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
5295 u->size, u->origdata, &retval);
5296 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
5297 (long long int)u->size, u->origdata, (void*)handle).c_str());
5299 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
5301 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
5302 u->size, u->origdata, &retval);
5303 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
5304 (long long int)u->size, u->origdata, (void*)handle).c_str());
5305 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
5308 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
5309 if(!handle || retval != CL_SUCCESS)
5312 u->prevAllocator = u->currAllocator;
5313 u->currAllocator = this;
5314 u->flags |= tempUMatFlags | flags0;
5315 u->allocatorFlags_ = allocatorFlags;
5317 if (!!(accessFlags & ACCESS_WRITE))
5318 u->markHostCopyObsolete(true);
5319 opencl_allocator_stats.onAllocate(u->size);
5323 /*void sync(UMatData* u) const
5325 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5326 UMatDataAutoLock lock(u);
5328 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
5330 if( u->tempCopiedUMat() )
5332 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5333 u->size, u->origdata, 0, 0, 0);
5338 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5339 (CL_MAP_READ | CL_MAP_WRITE),
5340 0, u->size, 0, 0, 0, &retval);
5341 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5344 u->markHostCopyObsolete(false);
5346 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
5348 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5349 u->size, u->data, 0, 0, 0);
5353 void deallocate(UMatData* u) const CV_OVERRIDE
5358 CV_Assert(u->urefcount == 0);
5359 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
5361 CV_Assert(u->handle != 0);
5362 CV_Assert(u->mapcount == 0);
5364 if (!!(u->flags & UMatData::ASYNC_CLEANUP))
5365 addToCleanupQueue(u);
5370 void deallocate_(UMatData* u) const
5373 CV_Assert(u->handle);
5374 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
5376 opencl_allocator_stats.onFree(u->size);
5380 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
5381 return; // avoid any OpenCL calls
5385 CV_Assert(u->origdata);
5386 // UMatDataAutoLock lock(u);
5388 if (u->hostCopyObsolete())
5390 #ifdef HAVE_OPENCL_SVM
5391 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5393 Context& ctx = Context::getDefault();
5394 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5395 CV_DbgAssert(svmFns->isValid());
5397 if( u->tempCopiedUMat() )
5399 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5400 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
5401 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
5402 cl_command_queue q = NULL;
5403 if (!isFineGrainBuffer)
5405 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
5406 q = (cl_command_queue)Queue::getDefault().ptr();
5407 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5408 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5411 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5414 memcpy(u->origdata, u->handle, u->size);
5415 if (!isFineGrainBuffer)
5417 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5418 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5419 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5424 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
5431 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5432 if( u->tempCopiedUMat() )
5434 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5435 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5436 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
5443 CV_Assert(u->mapcount == 0);
5444 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
5445 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5446 (CL_MAP_READ | CL_MAP_WRITE),
5447 0, u->size, 0, 0, 0, &retval);
5448 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
5449 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
5450 if (u->originalUMatData)
5452 CV_Assert(u->originalUMatData->data == data);
5454 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5455 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
5456 CV_OCL_DBG_CHECK(clFinish(q));
5460 u->markHostCopyObsolete(false);
5466 #ifdef HAVE_OPENCL_SVM
5467 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5469 if( u->tempCopiedUMat() )
5471 Context& ctx = Context::getDefault();
5472 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5473 CV_DbgAssert(svmFns->isValid());
5475 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
5476 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
5482 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
5483 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
5486 u->markDeviceCopyObsolete(true);
5487 u->currAllocator = u->prevAllocator;
5488 u->prevAllocator = NULL;
5489 if(u->data && u->copyOnMap() && u->data != u->origdata)
5491 u->data = u->origdata;
5492 u->currAllocator->deallocate(u);
5497 CV_Assert(u->origdata == NULL);
5498 if(u->data && u->copyOnMap() && u->data != u->origdata)
5502 u->markHostCopyObsolete(true);
5504 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
5506 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5508 ocl::Context& ctx = *pCtx.get();
5509 CV_Assert(ctx.getImpl());
5510 ctx.getImpl()->getBufferPool().release((cl_mem)u->handle);
5512 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
5514 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5516 ocl::Context& ctx = *pCtx.get();
5517 CV_Assert(ctx.getImpl());
5518 ctx.getImpl()->getBufferPoolHostPtr().release((cl_mem)u->handle);
5520 #ifdef HAVE_OPENCL_SVM
5521 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
5523 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5525 ocl::Context& ctx = *pCtx.get();
5526 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5530 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5531 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5533 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5534 CV_DbgAssert(svmFns->isValid());
5535 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5537 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
5539 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5540 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5541 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5544 CV_Assert(ctx.getImpl());
5545 ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle);
5550 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5553 u->markDeviceCopyObsolete(true);
5557 CV_Assert(u == NULL);
5560 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
5561 void map(UMatData* u, AccessFlag accessFlags) const CV_OVERRIDE
5563 CV_Assert(u && u->handle);
5565 if (!!(accessFlags & ACCESS_WRITE))
5566 u->markDeviceCopyObsolete(true);
5568 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5571 if( !u->copyOnMap() )
5574 // because there can be other map requests for the same UMat with different access flags,
5575 // we use the universal (read-write) access mode.
5576 #ifdef HAVE_OPENCL_SVM
5577 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5579 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5581 Context& ctx = Context::getDefault();
5582 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5583 CV_DbgAssert(svmFns->isValid());
5585 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5587 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5588 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5591 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5592 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5596 u->data = (uchar*)u->handle;
5597 u->markHostCopyObsolete(false);
5598 u->markDeviceMemMapped(true);
5603 cl_int retval = CL_SUCCESS;
5604 if (!u->deviceMemMapped())
5606 CV_Assert(u->refcount == 1);
5607 CV_Assert(u->mapcount++ == 0);
5608 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5609 (CL_MAP_READ | CL_MAP_WRITE),
5610 0, u->size, 0, 0, 0, &retval);
5611 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5613 if (u->data && retval == CL_SUCCESS)
5615 u->markHostCopyObsolete(false);
5616 u->markDeviceMemMapped(true);
5620 // TODO Is it really a good idea and was it tested well?
5621 // if map failed, switch to copy-on-map mode for the particular buffer
5622 u->flags |= UMatData::COPY_ON_MAP;
5627 u->data = (uchar*)fastMalloc(u->size);
5628 u->markHostCopyObsolete(true);
5632 if (!!(accessFlags & ACCESS_READ) && u->hostCopyObsolete())
5634 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5635 #ifdef HAVE_OPENCL_SVM
5636 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5638 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5639 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5640 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5641 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5642 u->markHostCopyObsolete(false);
5646 void unmap(UMatData* u) const CV_OVERRIDE
5652 CV_Assert(u->handle != 0);
5654 UMatDataAutoLock autolock(u);
5656 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5658 if( !u->copyOnMap() && u->deviceMemMapped() )
5660 CV_Assert(u->data != NULL);
5661 #ifdef HAVE_OPENCL_SVM
5662 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5664 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5666 Context& ctx = Context::getDefault();
5667 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5668 CV_DbgAssert(svmFns->isValid());
5670 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5672 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5673 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5675 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5677 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5680 if (u->refcount == 0)
5682 u->markDeviceCopyObsolete(false);
5683 u->markHostCopyObsolete(true);
5687 if (u->refcount == 0)
5689 CV_Assert(u->mapcount-- == 1);
5690 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5691 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5692 if (Device::getDefault().isAMD())
5694 // required for multithreaded applications (see stitching test)
5695 CV_OCL_DBG_CHECK(clFinish(q));
5697 u->markDeviceMemMapped(false);
5699 u->markDeviceCopyObsolete(false);
5700 u->markHostCopyObsolete(true);
5703 else if( u->copyOnMap() && u->deviceCopyObsolete() )
5705 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5706 #ifdef HAVE_OPENCL_SVM
5707 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5709 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5710 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5711 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5712 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5713 u->markDeviceCopyObsolete(false);
5714 u->markHostCopyObsolete(true);
5718 bool checkContinuous(int dims, const size_t sz[],
5719 const size_t srcofs[], const size_t srcstep[],
5720 const size_t dstofs[], const size_t dststep[],
5721 size_t& total, size_t new_sz[],
5722 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5723 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5725 bool iscontinuous = true;
5726 srcrawofs = srcofs ? srcofs[dims-1] : 0;
5727 dstrawofs = dstofs ? dstofs[dims-1] : 0;
5729 for( int i = dims-2; i >= 0; i-- )
5731 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5732 iscontinuous = false;
5735 srcrawofs += srcofs[i]*srcstep[i];
5737 dstrawofs += dstofs[i]*dststep[i];
5742 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5745 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5746 // we assume that new_... arrays are initialized by caller
5747 // with 0's, so there is no else branch
5750 new_srcofs[0] = srcofs[1];
5751 new_srcofs[1] = srcofs[0];
5757 new_dstofs[0] = dstofs[1];
5758 new_dstofs[1] = dstofs[0];
5762 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5763 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5767 // we could check for dims == 3 here,
5768 // but from user perspective this one is more informative
5769 CV_Assert(dims <= 3);
5770 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
5773 new_srcofs[0] = srcofs[2];
5774 new_srcofs[1] = srcofs[1];
5775 new_srcofs[2] = srcofs[0];
5780 new_dstofs[0] = dstofs[2];
5781 new_dstofs[1] = dstofs[1];
5782 new_dstofs[2] = dstofs[0];
5785 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
5786 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
5789 return iscontinuous;
5792 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
5793 const size_t srcofs[], const size_t srcstep[],
5794 const size_t dststep[]) const CV_OVERRIDE
5798 UMatDataAutoLock autolock(u);
5800 if( u->data && !u->hostCopyObsolete() )
5802 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
5805 CV_Assert( u->handle != 0 );
5807 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5809 size_t total = 0, new_sz[] = {0, 0, 0};
5810 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5811 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5813 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
5815 srcrawofs, new_srcofs, new_srcstep,
5816 dstrawofs, new_dstofs, new_dststep);
5818 #ifdef HAVE_OPENCL_SVM
5819 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5821 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5822 Context& ctx = Context::getDefault();
5823 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5824 CV_DbgAssert(svmFns->isValid());
5826 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5827 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5829 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5830 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5833 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5838 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
5842 // This code is from MatAllocator::download()
5843 int isz[CV_MAX_DIM];
5844 uchar* srcptr = (uchar*)u->handle;
5845 for( int i = 0; i < dims; i++ )
5847 CV_Assert( sz[i] <= (size_t)INT_MAX );
5851 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5852 isz[i] = (int)sz[i];
5855 Mat src(dims, isz, CV_8U, srcptr, srcstep);
5856 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5858 const Mat* arrays[] = { &src, &dst };
5860 NAryMatIterator it(arrays, ptrs, 2);
5861 size_t j, planesz = it.size;
5863 for( j = 0; j < it.nplanes; j++, ++it )
5864 memcpy(ptrs[1], ptrs[0], planesz);
5866 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5868 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5869 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5871 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5880 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5881 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5882 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
5884 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5886 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5887 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5888 size_t membuf_ofs = srcrawofs - new_srcrawofs;
5889 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5890 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5891 uchar* ptr = alignedPtr.getAlignedPtr();
5893 CV_Assert(new_srcstep[0] >= new_sz[0]);
5894 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
5895 total = std::min(total, u->size - new_srcrawofs);
5896 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5897 new_srcrawofs, total, ptr, 0, 0, 0));
5898 for( size_t i = 0; i < new_sz[1]; i++ )
5899 memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
5903 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5904 uchar* ptr = alignedPtr.getAlignedPtr();
5906 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5907 new_srcofs, new_dstofs, new_sz,
5915 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
5916 const size_t dstofs[], const size_t dststep[],
5917 const size_t srcstep[]) const CV_OVERRIDE
5922 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5923 CV_Assert(u->refcount == 0 || u->tempUMat());
5925 size_t total = 0, new_sz[] = {0, 0, 0};
5926 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5927 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5929 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
5931 srcrawofs, new_srcofs, new_srcstep,
5932 dstrawofs, new_dstofs, new_dststep);
5934 UMatDataAutoLock autolock(u);
5936 // if there is cached CPU copy of the GPU matrix,
5937 // we could use it as a destination.
5938 // we can do it in 2 cases:
5939 // 1. we overwrite the whole content
5940 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
5941 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
5943 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
5944 u->markHostCopyObsolete(false);
5945 u->markDeviceCopyObsolete(true);
5949 CV_Assert( u->handle != 0 );
5950 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5952 #ifdef HAVE_OPENCL_SVM
5953 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5955 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5956 Context& ctx = Context::getDefault();
5957 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5958 CV_DbgAssert(svmFns->isValid());
5960 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5961 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5963 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5964 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
5967 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5972 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
5976 // This code is from MatAllocator::upload()
5977 int isz[CV_MAX_DIM];
5978 uchar* dstptr = (uchar*)u->handle;
5979 for( int i = 0; i < dims; i++ )
5981 CV_Assert( sz[i] <= (size_t)INT_MAX );
5985 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
5986 isz[i] = (int)sz[i];
5989 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
5990 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5992 const Mat* arrays[] = { &src, &dst };
5994 NAryMatIterator it(arrays, ptrs, 2);
5995 size_t j, planesz = it.size;
5997 for( j = 0; j < it.nplanes; j++, ++it )
5998 memcpy(ptrs[1], ptrs[0], planesz);
6000 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6002 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6003 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6005 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6014 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6015 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6016 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
6017 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
6018 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
6020 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6022 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6023 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6024 size_t membuf_ofs = dstrawofs - new_dstrawofs;
6025 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
6026 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6027 uchar* ptr = alignedPtr.getAlignedPtr();
6029 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6030 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
6031 total = std::min(total, u->size - new_dstrawofs);
6032 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
6033 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
6034 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
6035 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6036 new_dstrawofs, total, ptr, 0, 0, 0));
6037 for( size_t i = 0; i < new_sz[1]; i++ )
6038 memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
6039 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6040 new_dstrawofs, total, ptr, 0, 0, 0));
6044 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6045 uchar* ptr = alignedPtr.getAlignedPtr();
6047 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6048 new_dstofs, new_srcofs, new_sz,
6054 u->markHostCopyObsolete(true);
6055 #ifdef HAVE_OPENCL_SVM
6056 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6057 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6064 u->markHostCopyObsolete(true);
6066 u->markDeviceCopyObsolete(false);
6069 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
6070 const size_t srcofs[], const size_t srcstep[],
6071 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
6076 size_t total = 0, new_sz[] = {0, 0, 0};
6077 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6078 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6080 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
6082 srcrawofs, new_srcofs, new_srcstep,
6083 dstrawofs, new_dstofs, new_dststep);
6085 UMatDataAutoLock src_autolock(src, dst);
6087 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
6089 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6092 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
6094 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6095 dst->markHostCopyObsolete(false);
6096 #ifdef HAVE_OPENCL_SVM
6097 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6098 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6105 dst->markDeviceCopyObsolete(true);
6110 // there should be no user-visible CPU copies of the UMat which we are going to copy to
6111 CV_Assert(dst->refcount == 0);
6112 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6114 cl_int retval = CL_SUCCESS;
6115 #ifdef HAVE_OPENCL_SVM
6116 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
6117 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6119 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
6120 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6122 Context& ctx = Context::getDefault();
6123 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6124 CV_DbgAssert(svmFns->isValid());
6128 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
6129 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
6130 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
6131 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
6132 total, 0, NULL, NULL);
6133 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
6138 // This code is from MatAllocator::download()/upload()
6139 int isz[CV_MAX_DIM];
6140 uchar* srcptr = (uchar*)src->handle;
6141 for( int i = 0; i < dims; i++ )
6143 CV_Assert( sz[i] <= (size_t)INT_MAX );
6147 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6148 isz[i] = (int)sz[i];
6150 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
6152 uchar* dstptr = (uchar*)dst->handle;
6153 for( int i = 0; i < dims; i++ )
6156 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6158 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
6160 const Mat* arrays[] = { &m_src, &m_dst };
6162 NAryMatIterator it(arrays, ptrs, 2);
6163 size_t j, planesz = it.size;
6165 for( j = 0; j < it.nplanes; j++, ++it )
6166 memcpy(ptrs[1], ptrs[0], planesz);
6171 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6173 map(src, ACCESS_READ);
6174 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6179 map(dst, ACCESS_WRITE);
6180 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6190 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6191 srcrawofs, dstrawofs, total, 0, 0, 0);
6192 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
6193 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
6195 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6197 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6198 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6199 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
6200 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6201 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
6203 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6204 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6205 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
6206 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6207 uchar* srcptr = srcBuf.getAlignedPtr();
6208 uchar* dstptr = dstBuf.getAlignedPtr();
6210 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6212 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
6213 src_total = std::min(src_total, src->size - new_srcrawofs);
6214 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
6215 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
6217 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
6218 new_srcrawofs, src_total, srcptr, 0, 0, 0));
6219 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6220 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6222 for( size_t i = 0; i < new_sz[1]; i++ )
6223 memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
6224 srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
6225 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6226 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6230 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6231 new_srcofs, new_dstofs, new_sz,
6237 if (retval == CL_SUCCESS)
6239 CV_IMPL_ADD(CV_IMPL_OCL)
6242 #ifdef HAVE_OPENCL_SVM
6243 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6244 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6251 dst->markHostCopyObsolete(true);
6253 dst->markDeviceCopyObsolete(false);
6257 CV_OCL_DBG_CHECK(clFinish(q));
6261 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE
6263 ocl::Context ctx = Context::getDefault();
6266 #ifdef HAVE_OPENCL_SVM
6267 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
6269 return &ctx.getImpl()->getBufferPoolSVM();
6272 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
6274 return &ctx.getImpl()->getBufferPoolHostPtr();
6276 if (id != NULL && strcmp(id, "OCL") != 0)
6278 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
6280 return &ctx.getImpl()->getBufferPool();
6283 MatAllocator* matStdAllocator;
6285 mutable cv::Mutex cleanupQueueMutex;
6286 mutable std::deque<UMatData*> cleanupQueue;
6288 void flushCleanupQueue() const
6290 if (!cleanupQueue.empty())
6292 std::deque<UMatData*> q;
6294 cv::AutoLock lock(cleanupQueueMutex);
6295 q.swap(cleanupQueue);
6297 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
6303 void addToCleanupQueue(UMatData* u) const
6305 //TODO: Validation check: CV_Assert(!u->tempUMat());
6307 cv::AutoLock lock(cleanupQueueMutex);
6308 cleanupQueue.push_back(u);
6313 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
6315 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
6316 g_isOpenCVActivated = true;
6319 MatAllocator* getOpenCLAllocator()
6321 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
6324 }} // namespace cv::ocl
6329 // three funcs below are implemented in umatrix.cpp
6330 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
6331 bool autoSteps = false );
6332 void finalizeHdr(UMat& m);
6337 namespace cv { namespace ocl {
6340 // Convert OpenCL buffer memory to UMat
6342 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
6345 int sizes[] = { rows, cols };
6347 CV_Assert(0 <= d && d <= CV_MAX_DIM);
6351 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
6352 dst.usageFlags = USAGE_DEFAULT;
6354 setSize(dst, d, sizes, 0, true);
6357 cl_mem memobj = (cl_mem)cl_mem_buffer;
6358 cl_mem_object_type mem_type = 0;
6360 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6362 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
6365 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
6367 CV_OCL_CHECK(clRetainMemObject(memobj));
6369 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
6370 CV_Assert(total >= rows * step);
6372 // attach clBuffer to UMatData
6373 dst.u = new UMatData(getOpenCLAllocator());
6375 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
6376 dst.u->flags = static_cast<UMatData::MemoryFlag>(0);
6377 dst.u->handle = cl_mem_buffer;
6378 dst.u->origdata = 0;
6379 dst.u->prevAllocator = 0;
6380 dst.u->size = total;
6386 } // convertFromBuffer()
6390 // Convert OpenCL image2d_t memory to UMat
6392 void convertFromImage(void* cl_mem_image, UMat& dst)
6394 cl_mem clImage = (cl_mem)cl_mem_image;
6395 cl_mem_object_type mem_type = 0;
6397 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6399 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
6401 cl_image_format fmt = { 0, 0 };
6402 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
6405 switch (fmt.image_channel_data_type)
6408 case CL_UNSIGNED_INT8:
6413 case CL_SIGNED_INT8:
6417 case CL_UNORM_INT16:
6418 case CL_UNSIGNED_INT16:
6422 case CL_SNORM_INT16:
6423 case CL_SIGNED_INT16:
6427 case CL_SIGNED_INT32:
6436 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
6440 switch (fmt.image_channel_order)
6443 type = CV_MAKE_TYPE(depth, 1);
6449 type = CV_MAKE_TYPE(depth, 4);
6453 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
6458 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
6461 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
6464 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
6466 dst.create((int)h, (int)w, type);
6468 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
6470 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6473 size_t src_origin[3] = { 0, 0, 0 };
6474 size_t region[3] = { w, h, 1 };
6475 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
6477 CV_OCL_CHECK(clFinish(q));
6480 } // convertFromImage()
6483 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
6485 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
6487 cl_uint numDevices = 0;
6488 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
6489 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
6491 CV_OCL_DBG_CHECK_RESULT(status,
6492 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
6495 if (numDevices == 0)
6501 devices.resize((size_t)numDevices);
6502 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
6505 struct PlatformInfo::Impl
6510 handle = *(cl_platform_id*)id;
6511 getDevices(devices, handle);
6514 String getStrProp(cl_platform_info prop) const
6518 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
6519 sz < sizeof(buf) ? String(buf) : String();
6522 IMPLEMENT_REFCOUNTABLE();
6523 std::vector<cl_device_id> devices;
6524 cl_platform_id handle;
6527 PlatformInfo::PlatformInfo()
6532 PlatformInfo::PlatformInfo(void* platform_id)
6534 p = new Impl(platform_id);
6537 PlatformInfo::~PlatformInfo()
6543 PlatformInfo::PlatformInfo(const PlatformInfo& i)
6550 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6563 int PlatformInfo::deviceNumber() const
6565 return p ? (int)p->devices.size() : 0;
6568 void PlatformInfo::getDevice(Device& device, int d) const
6570 CV_Assert(p && d < (int)p->devices.size() );
6572 device.set(p->devices[d]);
6575 String PlatformInfo::name() const
6577 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6580 String PlatformInfo::vendor() const
6582 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6585 String PlatformInfo::version() const
6587 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
6590 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6592 cl_uint numPlatforms = 0;
6593 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6595 if (numPlatforms == 0)
6601 platforms.resize((size_t)numPlatforms);
6602 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6605 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6607 std::vector<cl_platform_id> platforms;
6608 getPlatforms(platforms);
6610 for (size_t i = 0; i < platforms.size(); i++)
6611 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6614 const char* typeToStr(int type)
6616 static const char* tab[]=
6618 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6619 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6620 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6621 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6622 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6623 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6624 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6625 "half", "half2", "half3", "half4", 0, 0, 0, "half8", 0, 0, 0, 0, 0, 0, 0, "half16",
6626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6628 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6629 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6634 const char* memopTypeToStr(int type)
6636 static const char* tab[] =
6638 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6639 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6640 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6641 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6642 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6643 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6644 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6645 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6648 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6649 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6654 const char* vecopTypeToStr(int type)
6656 static const char* tab[] =
6658 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6659 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6660 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6661 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6662 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6663 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6664 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6665 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6668 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6669 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6674 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6676 if( sdepth == ddepth )
6678 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6679 if( ddepth >= CV_32F ||
6680 (ddepth == CV_32S && sdepth < CV_32S) ||
6681 (ddepth == CV_16S && sdepth <= CV_8S) ||
6682 (ddepth == CV_16U && sdepth == CV_8U))
6684 sprintf(buf, "convert_%s", typestr);
6686 else if( sdepth >= CV_32F )
6687 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6689 sprintf(buf, "convert_%s_sat", typestr);
6694 const char* getOpenCLErrorString(int errorCode)
6696 #define CV_OCL_CODE(id) case id: return #id
6697 #define CV_OCL_CODE_(id, name) case id: return #name
6700 CV_OCL_CODE(CL_SUCCESS);
6701 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6702 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6703 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6704 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6705 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6706 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6707 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6708 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6709 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6710 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6711 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
6712 CV_OCL_CODE(CL_MAP_FAILURE);
6713 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
6714 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
6715 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
6716 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
6717 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
6718 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
6719 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
6720 CV_OCL_CODE(CL_INVALID_VALUE);
6721 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
6722 CV_OCL_CODE(CL_INVALID_PLATFORM);
6723 CV_OCL_CODE(CL_INVALID_DEVICE);
6724 CV_OCL_CODE(CL_INVALID_CONTEXT);
6725 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
6726 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
6727 CV_OCL_CODE(CL_INVALID_HOST_PTR);
6728 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
6729 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
6730 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
6731 CV_OCL_CODE(CL_INVALID_SAMPLER);
6732 CV_OCL_CODE(CL_INVALID_BINARY);
6733 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
6734 CV_OCL_CODE(CL_INVALID_PROGRAM);
6735 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
6736 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
6737 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
6738 CV_OCL_CODE(CL_INVALID_KERNEL);
6739 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
6740 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
6741 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
6742 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
6743 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
6744 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
6745 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
6746 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
6747 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
6748 CV_OCL_CODE(CL_INVALID_EVENT);
6749 CV_OCL_CODE(CL_INVALID_OPERATION);
6750 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
6751 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
6752 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
6753 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
6755 CV_OCL_CODE(CL_INVALID_PROPERTY);
6757 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
6758 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
6759 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
6760 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
6762 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
6763 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
6765 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
6766 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
6767 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
6768 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
6769 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
6770 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
6771 default: return "Unknown OpenCL error";
6777 template <typename T>
6778 static std::string kerToStr(const Mat & k)
6780 int width = k.cols - 1, depth = k.depth();
6781 const T * const data = k.ptr<T>();
6783 std::ostringstream stream;
6784 stream.precision(10);
6788 for (int i = 0; i < width; ++i)
6789 stream << "DIG(" << (int)data[i] << ")";
6790 stream << "DIG(" << (int)data[width] << ")";
6792 else if (depth == CV_32F)
6794 stream.setf(std::ios_base::showpoint);
6795 for (int i = 0; i < width; ++i)
6796 stream << "DIG(" << data[i] << "f)";
6797 stream << "DIG(" << data[width] << "f)";
6801 for (int i = 0; i < width; ++i)
6802 stream << "DIG(" << data[i] << ")";
6803 stream << "DIG(" << data[width] << ")";
6806 return stream.str();
6809 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
6811 Mat kernel = _kernel.getMat().reshape(1, 1);
6813 int depth = kernel.depth();
6817 if (ddepth != depth)
6818 kernel.convertTo(kernel, ddepth);
6820 typedef std::string (* func_t)(const Mat &);
6821 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
6822 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
6823 const func_t func = funcs[ddepth];
6824 CV_Assert(func != 0);
6826 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
6829 #define PROCESS_SRC(src) \
6834 CV_Assert(src.isMat() || src.isUMat()); \
6835 Size csize = src.size(); \
6836 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
6837 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
6838 if (cwidth < ckercn || ckercn <= 0) \
6840 cols.push_back(cwidth); \
6841 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
6843 offsets.push_back(src.offset()); \
6844 steps.push_back(src.step()); \
6845 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
6846 kercns.push_back(ckercn); \
6851 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
6852 InputArray src4, InputArray src5, InputArray src6,
6853 InputArray src7, InputArray src8, InputArray src9,
6854 OclVectorStrategy strat)
6856 const ocl::Device & d = ocl::Device::getDefault();
6858 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
6859 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
6860 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
6861 d.preferredVectorWidthDouble(), -1 };
6863 // if the device says don't use vectors
6864 if (vectorWidths[0] == 1)
6867 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
6868 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
6869 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
6872 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
6875 int checkOptimalVectorWidth(const int *vectorWidths,
6876 InputArray src1, InputArray src2, InputArray src3,
6877 InputArray src4, InputArray src5, InputArray src6,
6878 InputArray src7, InputArray src8, InputArray src9,
6879 OclVectorStrategy strat)
6881 CV_Assert(vectorWidths);
6883 int ref_type = src1.type();
6885 std::vector<size_t> offsets, steps, cols;
6886 std::vector<int> dividers, kercns;
6897 size_t size = offsets.size();
6899 for (size_t i = 0; i < size; ++i)
6900 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
6901 dividers[i] >>= 1, kercns[i] >>= 1;
6904 int kercn = *std::min_element(kercns.begin(), kercns.end());
6909 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
6910 InputArray src4, InputArray src5, InputArray src6,
6911 InputArray src7, InputArray src8, InputArray src9)
6913 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
6919 // TODO Make this as a method of OpenCL "BuildOptions" class
6920 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
6922 if (!buildOptions.empty())
6923 buildOptions += " ";
6924 int type = _m.type(), depth = CV_MAT_DEPTH(type);
6925 buildOptions += format(
6926 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
6927 name.c_str(), ocl::typeToStr(type),
6928 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
6929 name.c_str(), (int)CV_MAT_CN(type),
6930 name.c_str(), (int)CV_ELEM_SIZE(type),
6931 name.c_str(), (int)CV_ELEM_SIZE1(type),
6932 name.c_str(), (int)depth
6937 struct Image2D::Impl
6939 Impl(const UMat &src, bool norm, bool alias)
6943 init(src, norm, alias);
6949 clReleaseMemObject(handle);
6952 static cl_image_format getImageFormat(int depth, int cn, bool norm)
6954 cl_image_format format;
6955 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
6956 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
6957 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
6958 CL_SNORM_INT16, -1, -1, -1, -1 };
6959 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
6961 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
6962 int channelOrder = channelOrders[cn];
6963 format.image_channel_data_type = (cl_channel_type)channelType;
6964 format.image_channel_order = (cl_channel_order)channelOrder;
6968 static bool isFormatSupported(cl_image_format format)
6971 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
6973 cl_context context = (cl_context)Context::getDefault().ptr();
6977 // Figure out how many formats are supported by this context.
6978 cl_uint numFormats = 0;
6979 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6980 CL_MEM_OBJECT_IMAGE2D, numFormats,
6982 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
6985 AutoBuffer<cl_image_format> formats(numFormats);
6986 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6987 CL_MEM_OBJECT_IMAGE2D, numFormats,
6988 formats.data(), NULL);
6989 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
6990 for (cl_uint i = 0; i < numFormats; ++i)
6992 if (!memcmp(&formats[i], &format, sizeof(format)))
7001 void init(const UMat &src, bool norm, bool alias)
7004 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7006 CV_Assert(!src.empty());
7007 CV_Assert(ocl::Device::getDefault().imageSupport());
7009 int err, depth = src.depth(), cn = src.channels();
7011 cl_image_format format = getImageFormat(depth, cn, norm);
7013 if (!isFormatSupported(format))
7014 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
7016 if (alias && !src.handle(ACCESS_RW))
7017 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
7019 cl_context context = (cl_context)Context::getDefault().ptr();
7020 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
7022 #ifdef CL_VERSION_1_2
7023 // this enables backwards portability to
7024 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
7025 const Device & d = ocl::Device::getDefault();
7026 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
7027 CV_Assert(!alias || canCreateAlias(src));
7028 if (1 < major || (1 == major && 2 <= minor))
7031 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
7032 desc.image_width = src.cols;
7033 desc.image_height = src.rows;
7034 desc.image_depth = 0;
7035 desc.image_array_size = 1;
7036 desc.image_row_pitch = alias ? src.step[0] : 0;
7037 desc.image_slice_pitch = 0;
7038 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
7039 desc.num_mip_levels = 0;
7040 desc.num_samples = 0;
7041 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
7046 CV_SUPPRESS_DEPRECATED_START
7047 CV_Assert(!alias); // This is an OpenCL 1.2 extension
7048 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
7049 CV_SUPPRESS_DEPRECATED_END
7051 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
7053 size_t origin[] = { 0, 0, 0 };
7054 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
7057 if (!alias && !src.isContinuous())
7059 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
7060 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
7061 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
7064 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
7065 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
7066 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
7067 CV_OCL_DBG_CHECK(clFlush(queue));
7071 devData = (cl_mem)src.handle(ACCESS_READ);
7073 CV_Assert(devData != NULL);
7077 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
7078 if (!src.isContinuous())
7080 CV_OCL_DBG_CHECK(clFlush(queue));
7081 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
7086 IMPLEMENT_REFCOUNTABLE();
7096 Image2D::Image2D(const UMat &src, bool norm, bool alias)
7098 p = new Impl(src, norm, alias);
7101 bool Image2D::canCreateAlias(const UMat &m)
7104 const Device & d = ocl::Device::getDefault();
7105 if (d.imageFromBufferSupport() && !m.empty())
7107 // This is the required pitch alignment in pixels
7108 uint pitchAlign = d.imagePitchAlignment();
7109 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
7111 // We don't currently handle the case where the buffer was created
7112 // with CL_MEM_USE_HOST_PTR
7113 if (!m.u->tempUMat())
7122 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
7124 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
7126 return Impl::isFormatSupported(format);
7129 Image2D::Image2D(const Image2D & i)
7136 Image2D & Image2D::operator = (const Image2D & i)
7155 void* Image2D::ptr() const
7157 return p ? p->handle : 0;
7160 bool internal::isOpenCLForced()
7162 static bool initialized = false;
7163 static bool value = false;
7166 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
7172 bool internal::isPerformanceCheckBypassed()
7174 static bool initialized = false;
7175 static bool value = false;
7178 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
7184 bool internal::isCLBuffer(UMat& u)
7186 void* h = u.handle(ACCESS_RW);
7189 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
7191 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
7194 cl_mem_object_type type = 0;
7195 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
7196 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
7206 Impl(const Queue& q)
7215 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7221 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7225 uint64 durationNS() const
7227 return (uint64)(timer.getTimeSec() * 1e9);
7233 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
7234 Timer::~Timer() { delete p; }
7248 uint64 Timer::durationNS() const
7251 return p->durationNS();
7256 #endif // HAVE_OPENCL