1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
45 #include "ocl_disabled.impl.hpp"
54 #include <iostream> // std::cerr
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
60 #include <opencv2/core/utils/configuration.private.hpp>
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
80 #define CV_OPENCL_SHOW_RUN_KERNELS 0
81 #define CV_OPENCL_TRACE_CHECK 0
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
86 #define CV_OPENCL_SHOW_SVM_LOG 0
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
91 # define LOG_BUFFER_POOL printf
93 # define LOG_BUFFER_POOL(...)
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
101 #define CV_OPENCL_SVM_TRACE_P(...)
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
111 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
117 #include "directx.hpp"
120 #ifdef HAVE_OPENCL_SVM
121 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
122 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
123 #include "opencv2/core/opencl/opencl_svm.hpp"
126 #include "umatrix.hpp"
128 namespace cv { namespace ocl {
130 #define IMPLEMENT_REFCOUNTABLE() \
131 void addref() { CV_XADD(&refcount, 1); } \
132 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
135 static cv::utils::AllocatorStatistics opencl_allocator_stats;
137 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
138 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
140 return opencl_allocator_stats;
144 static bool isRaiseError()
146 static bool initialized = false;
147 static bool value = false;
150 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
157 #if CV_OPENCL_TRACE_CHECK
159 void traceOpenCLCheck(cl_int status, const char* message)
161 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
163 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
165 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
168 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
169 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
171 #define CV_OCL_CHECK_RESULT(check_result, msg) \
173 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
174 if (check_result != CL_SUCCESS) \
176 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_CHECK_RESULT must be const char*"); \
177 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
178 CV_Error(Error::OpenCLApiCallError, error_msg); \
182 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
184 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
188 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
189 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
191 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
193 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
194 if (check_result != CL_SUCCESS && isRaiseError()) \
196 static_assert(std::is_convertible<decltype(msg), const char*>::value, "msg of CV_OCL_DBG_CHECK_RESULT must be const char*"); \
197 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
198 CV_Error(Error::OpenCLApiCallError, error_msg); \
201 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
202 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
206 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
207 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
208 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
209 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
211 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
212 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
215 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
216 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
224 static const String getBuildExtraOptions()
226 static String param_buildExtraOptions;
227 static bool initialized = false;
230 param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
232 if (!param_buildExtraOptions.empty())
233 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
235 return param_buildExtraOptions;
238 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
239 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
244 UMat2D(const UMat& m)
246 offset = (int)m.offset;
259 UMat3D(const UMat& m)
261 offset = (int)m.offset;
262 step = (int)m.step.p[1];
263 slicestep = (int)m.step.p[0];
264 slices = (int)m.size.p[0];
276 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
277 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
279 static uint64 table[256];
280 static bool initialized = false;
284 for( int i = 0; i < 256; i++ )
287 for( int j = 0; j < 8; j++ )
288 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
295 for( size_t idx = 0; idx < size; idx++ )
296 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
301 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
302 struct OpenCLBinaryCacheConfigurator
304 cv::String cache_path_;
305 cv::String cache_lock_filename_;
306 cv::Ptr<utils::fs::FileLock> cache_lock_;
308 typedef std::map<std::string, std::string> ContextCacheType;
309 ContextCacheType prepared_contexts_;
310 Mutex mutex_prepared_contexts_;
312 OpenCLBinaryCacheConfigurator()
314 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
315 if (!CV_OPENCL_CACHE_ENABLE)
317 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
320 cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
321 if (cache_path_.empty())
323 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
329 if (cache_path_.empty())
331 if (cache_path_ == "disabled")
333 if (!utils::fs::createDirectories(cache_path_))
335 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
340 if (CV_OPENCL_CACHE_LOCK_ENABLE)
342 cache_lock_filename_ = cache_path_ + ".lock";
343 if (!utils::fs::exists(cache_lock_filename_))
345 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
346 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
347 if (!lock_filename.is_open())
349 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
356 cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
357 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
359 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
361 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
363 catch (const cv::Exception& e)
365 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
369 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
374 if (CV_OPENCL_CACHE_WRITE)
376 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
377 "(not safe for multiprocess environment)");
381 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
385 catch (const cv::Exception& e)
387 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
391 if (!cache_path_.empty())
393 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
395 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
396 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
400 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
408 cache_lock_filename_.clear();
409 cache_lock_.release();
412 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
413 const std::string& cleanup_prefix)
415 if (cache_path_.empty())
416 return std::string();
418 AutoLock lock(mutex_prepared_contexts_);
420 ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
421 if (found_it != prepared_contexts_.end())
422 return found_it->second;
424 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
426 std::string target_directory = cache_path_ + ctx_prefix + "/";
427 bool result = utils::fs::isDirectory(target_directory);
432 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
433 if (utils::fs::createDirectories(target_directory))
439 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
442 catch (const cv::Exception& e)
444 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
447 target_directory = result ? target_directory : std::string();
448 prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
450 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
454 std::vector<String> entries;
455 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
456 std::vector<String> remove_entries;
457 for (size_t i = 0; i < entries.size(); i++)
459 const String& name = entries[i];
460 if (0 == name.find(cleanup_prefix))
462 if (0 == name.find(ctx_prefix))
463 continue; // skip current
464 remove_entries.push_back(name);
467 if (!remove_entries.empty())
469 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
470 ? "Detected OpenCL cache directory for other version of OpenCL device."
471 : "Detected OpenCL cache directories for other versions of OpenCL device.")
472 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
473 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
474 for (size_t i = 0; i < remove_entries.size(); i++)
476 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
478 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
480 for (size_t i = 0; i < remove_entries.size(); i++)
482 const String& name = remove_entries[i];
483 cv::String path = utils::fs::join(cache_path_, name);
486 utils::fs::remove_all(path);
487 CV_LOG_WARNING(NULL, "Removed: " << path);
489 catch (const cv::Exception& e)
491 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
498 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
502 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
503 return target_directory;
506 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
508 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
511 class BinaryProgramFile
513 enum { MAX_ENTRIES = 64 };
515 typedef unsigned int uint32_t;
517 struct CV_DECL_ALIGNED(4) FileHeader
519 uint32_t sourceSignatureSize;
520 //char sourceSignature[];
523 struct CV_DECL_ALIGNED(4) FileTable
525 uint32_t numberOfEntries;
526 //uint32_t firstEntryOffset[];
529 struct CV_DECL_ALIGNED(4) FileEntry
531 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
538 const std::string fileName_;
539 const char* const sourceSignature_;
540 const size_t sourceSignatureSize_;
544 uint32_t entryOffsets[MAX_ENTRIES];
546 uint32_t getHash(const std::string& options)
548 uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
549 return hash & (MAX_ENTRIES - 1);
552 inline size_t getFileSize()
554 size_t pos = (size_t)f.tellg();
555 f.seekg(0, std::fstream::end);
556 size_t fileSize = (size_t)f.tellg();
557 f.seekg(pos, std::fstream::beg);
560 inline uint32_t readUInt32()
563 f.read((char*)&res, sizeof(uint32_t));
564 CV_Assert(!f.fail());
567 inline void writeUInt32(const uint32_t value)
570 f.write((char*)&v, sizeof(uint32_t));
571 CV_Assert(!f.fail());
574 inline void seekReadAbsolute(size_t pos)
576 f.seekg(pos, std::fstream::beg);
577 CV_Assert(!f.fail());
579 inline void seekReadRelative(size_t pos)
581 f.seekg(pos, std::fstream::cur);
582 CV_Assert(!f.fail());
585 inline void seekWriteAbsolute(size_t pos)
587 f.seekp(pos, std::fstream::beg);
588 CV_Assert(!f.fail());
594 if (0 != remove(fileName_.c_str()))
595 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
600 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
601 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
603 CV_StaticAssert(sizeof(uint32_t) == 4, "");
604 CV_Assert(sourceSignature_ != NULL);
605 CV_Assert(sourceSignatureSize_ > 0);
606 memset(entryOffsets, 0, sizeof(entryOffsets));
608 f.rdbuf()->pubsetbuf(0, 0); // disable buffering
609 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
610 if(f.is_open() && getFileSize() > 0)
612 bool isValid = false;
615 uint32_t fileSourceSignatureSize = readUInt32();
616 if (fileSourceSignatureSize == sourceSignatureSize_)
618 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
619 f.read(fileSourceSignature.data(), fileSourceSignatureSize);
622 CV_LOG_ERROR(NULL, "Unexpected EOF");
624 else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
631 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
634 catch (const cv::Exception& e)
636 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
640 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
653 bool read(const std::string& key, std::vector<char>& buf)
658 size_t fileSize = getFileSize();
661 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
668 uint32_t fileSourceSignatureSize = readUInt32();
669 CV_Assert(fileSourceSignatureSize > 0);
670 seekReadRelative(fileSourceSignatureSize);
672 uint32_t numberOfEntries = readUInt32();
673 CV_Assert(numberOfEntries > 0);
674 if (numberOfEntries != MAX_ENTRIES)
676 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
680 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
681 CV_Assert(!f.fail());
683 uint32_t entryNum = getHash(key);
685 uint32_t entryOffset = entryOffsets[entryNum];
687 while (entryOffset > 0)
689 seekReadAbsolute(entryOffset);
690 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
691 f.read((char*)&entry, sizeof(entry));
692 CV_Assert(!f.fail());
693 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
694 if (key.size() == entry.keySize)
696 if (entry.keySize > 0)
698 f.read(fileKey.data(), entry.keySize);
699 CV_Assert(!f.fail());
701 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
703 buf.resize(entry.dataSize);
704 f.read(&buf[0], entry.dataSize);
705 CV_Assert(!f.fail());
707 CV_LOG_VERBOSE(NULL, 0, "Read...");
711 if (entry.nextEntryFileOffset == 0)
713 entryOffset = entry.nextEntryFileOffset;
718 bool write(const std::string& key, std::vector<char>& buf)
722 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
725 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
728 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
734 size_t fileSize = getFileSize();
738 seekWriteAbsolute(0);
739 writeUInt32((uint32_t)sourceSignatureSize_);
740 f.write(sourceSignature_, sourceSignatureSize_);
741 CV_Assert(!f.fail());
743 writeUInt32(MAX_ENTRIES);
744 memset(entryOffsets, 0, sizeof(entryOffsets));
745 f.write((char*)entryOffsets, sizeof(entryOffsets));
746 CV_Assert(!f.fail());
748 CV_Assert(!f.fail());
750 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
751 CV_Assert(f.is_open());
752 fileSize = getFileSize();
757 uint32_t fileSourceSignatureSize = readUInt32();
758 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
759 seekReadRelative(fileSourceSignatureSize);
761 uint32_t numberOfEntries = readUInt32();
762 CV_Assert(numberOfEntries > 0);
763 if (numberOfEntries != MAX_ENTRIES)
765 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
769 size_t tableEntriesOffset = (size_t)f.tellg();
770 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
771 CV_Assert(!f.fail());
773 uint32_t entryNum = getHash(key);
775 uint32_t entryOffset = entryOffsets[entryNum];
777 while (entryOffset > 0)
779 seekReadAbsolute(entryOffset);
780 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
781 f.read((char*)&entry, sizeof(entry));
782 CV_Assert(!f.fail());
783 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
784 if (key.size() == entry.keySize)
786 if (entry.keySize > 0)
788 f.read(fileKey.data(), entry.keySize);
789 CV_Assert(!f.fail());
791 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
794 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
798 if (entry.nextEntryFileOffset == 0)
800 entryOffset = entry.nextEntryFileOffset;
805 seekWriteAbsolute(entryOffset);
806 entry.nextEntryFileOffset = (uint32_t)fileSize;
807 f.write((char*)&entry, sizeof(entry));
808 CV_Assert(!f.fail());
812 entryOffsets[entryNum] = (uint32_t)fileSize;
813 seekWriteAbsolute(tableEntriesOffset);
814 f.write((char*)entryOffsets, sizeof(entryOffsets));
815 CV_Assert(!f.fail());
817 seekWriteAbsolute(fileSize);
818 entry.nextEntryFileOffset = 0;
819 entry.dataSize = (uint32_t)buf.size();
820 entry.keySize = (uint32_t)key.size();
821 f.write((char*)&entry, sizeof(entry));
822 CV_Assert(!f.fail());
823 f.write(key.c_str(), entry.keySize);
824 CV_Assert(!f.fail());
825 f.write(&buf[0], entry.dataSize);
826 CV_Assert(!f.fail());
828 CV_Assert(!f.fail());
829 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
833 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
837 struct OpenCLExecutionContext::Impl
839 ocl::Context context_;
840 int device_; // device index in context
847 void _init_device(cl_device_id deviceID)
850 int ndevices = (int)context_.ndevices();
851 CV_Assert(ndevices > 0);
853 for (int i = 0; i < ndevices; i++)
855 ocl::Device d = context_.device(i);
856 cl_device_id dhandle = (cl_device_id)d.ptr();
857 if (dhandle == deviceID)
864 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
867 void _init_device(const ocl::Device& device)
869 CV_Assert(device.ptr());
870 int ndevices = (int)context_.ndevices();
871 CV_Assert(ndevices > 0);
873 for (int i = 0; i < ndevices; i++)
875 ocl::Device d = context_.device(i);
876 if (d.getImpl() == device.getImpl())
883 CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
887 Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID)
888 : device_(0), useOpenCL_(-1)
890 CV_UNUSED(platformID);
894 context_ = Context::fromHandle(context);
895 _init_device(deviceID);
896 queue_ = Queue(context_, context_.device(device_));
899 Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue)
900 : device_(0), useOpenCL_(-1)
902 CV_Assert(context.ptr());
903 CV_Assert(device.ptr());
906 _init_device(device);
910 Impl(const ocl::Context& context, const ocl::Device& device)
911 : device_(0), useOpenCL_(-1)
913 CV_Assert(context.ptr());
914 CV_Assert(device.ptr());
917 _init_device(device);
918 queue_ = Queue(context_, context_.device(device_));
921 Impl(const ocl::Context& context, const int device, const ocl::Queue& queue)
929 Impl(const Impl& other)
930 : context_(other.context_)
931 , device_(other.device_)
932 , queue_(other.queue_)
938 inline bool useOpenCL() const { return const_cast<Impl*>(this)->useOpenCL(); }
946 if (!context_.empty() && context_.ndevices() > 0)
948 const Device& d = context_.device(device_);
949 useOpenCL_ = d.available();
952 catch (const cv::Exception&)
957 CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context");
959 return useOpenCL_ > 0;
962 void setUseOpenCL(bool flag)
970 static const std::shared_ptr<Impl>& getInitializedExecutionContext()
974 CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context");
976 static bool initialized = false;
977 static std::shared_ptr<Impl> g_primaryExecutionContext;
981 cv::AutoLock lock(getInitializationMutex());
984 CV_LOG_INFO(NULL, "OpenCL: creating new execution context...");
987 Context c = ocl::Context::create(std::string());
991 auto& d = c.device(deviceId);
994 auto q = ocl::Queue(c, d);
997 CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue");
1001 g_primaryExecutionContext = std::make_shared<Impl>(c, deviceId, q);
1002 CV_LOG_INFO(NULL, "OpenCL: device=" << d.name());
1007 CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)");
1012 CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled");
1015 catch (const std::exception& e)
1017 CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what());
1021 CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception");
1026 return g_primaryExecutionContext;
1030 Context& OpenCLExecutionContext::getContext() const
1035 Device& OpenCLExecutionContext::getDevice() const
1038 return p->context_.device(p->device_);
1040 Queue& OpenCLExecutionContext::getQueue() const
1046 bool OpenCLExecutionContext::useOpenCL() const
1049 return p->useOpenCL();
1052 void OpenCLExecutionContext::setUseOpenCL(bool flag)
1055 p->setUseOpenCL(flag);
1059 OpenCLExecutionContext& OpenCLExecutionContext::getCurrent()
1061 CV_TRACE_FUNCTION();
1062 CoreTLSData& data = getCoreTlsData();
1063 OpenCLExecutionContext& c = data.oclExecutionContext;
1064 if (!data.oclExecutionContextInitialized)
1066 data.oclExecutionContextInitialized = true;
1067 if (c.empty() && haveOpenCL())
1068 c.p = Impl::getInitializedExecutionContext();
1074 OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef()
1076 CV_TRACE_FUNCTION();
1077 CoreTLSData& data = getCoreTlsData();
1078 OpenCLExecutionContext& c = data.oclExecutionContext;
1082 void OpenCLExecutionContext::bind() const
1084 CV_TRACE_FUNCTION();
1086 CoreTLSData& data = getCoreTlsData();
1087 data.oclExecutionContext = *this;
1088 data.oclExecutionContextInitialized = true;
1089 data.useOpenCL = p->useOpenCL_; // propagate "-1", avoid call useOpenCL()
1093 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const
1095 CV_TRACE_FUNCTION();
1097 const Queue q(getContext(), getDevice());
1098 return cloneWithNewQueue(q);
1101 OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const
1103 CV_TRACE_FUNCTION();
1105 CV_Assert(q.ptr() != NULL);
1106 OpenCLExecutionContext c;
1107 c.p = std::make_shared<Impl>(p->context_, p->device_, q);
1112 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue)
1114 CV_TRACE_FUNCTION();
1116 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1118 CV_Assert(!context.empty());
1119 CV_Assert(context.ptr());
1120 CV_Assert(!device.empty());
1121 CV_Assert(device.ptr());
1122 OpenCLExecutionContext ctx;
1123 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device, queue);
1129 OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device)
1131 CV_TRACE_FUNCTION();
1133 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
1135 CV_Assert(!context.empty());
1136 CV_Assert(context.ptr());
1137 CV_Assert(!device.empty());
1138 CV_Assert(device.ptr());
1139 OpenCLExecutionContext ctx;
1140 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device);
1145 void OpenCLExecutionContext::release()
1147 CV_TRACE_FUNCTION();
1152 // true if we have initialized OpenCL subsystem with available platforms
1153 static bool g_isOpenCLActivated = false;
1157 CV_TRACE_FUNCTION();
1158 static bool g_isOpenCLInitialized = false;
1159 static bool g_isOpenCLAvailable = false;
1161 if (!g_isOpenCLInitialized)
1163 CV_TRACE_REGION("Init_OpenCL_Runtime");
1164 const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
1167 if (cv::String(envPath) == "disabled")
1169 g_isOpenCLAvailable = false;
1170 g_isOpenCLInitialized = true;
1175 cv::AutoLock lock(getInitializationMutex());
1176 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
1180 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
1181 g_isOpenCLActivated = n > 0;
1182 CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms");
1186 g_isOpenCLAvailable = false;
1188 g_isOpenCLInitialized = true;
1190 return g_isOpenCLAvailable;
1195 CoreTLSData& data = getCoreTlsData();
1196 if (data.useOpenCL < 0)
1203 auto c = OpenCLExecutionContext::getCurrent();
1204 data.useOpenCL = c.useOpenCL();
1209 CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context");
1212 return data.useOpenCL > 0;
1215 bool isOpenCLActivated()
1217 if (!g_isOpenCLActivated)
1218 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
1222 void setUseOpenCL(bool flag)
1224 CV_TRACE_FUNCTION();
1226 CoreTLSData& data = getCoreTlsData();
1227 auto& c = OpenCLExecutionContext::getCurrentRef();
1230 c.setUseOpenCL(flag);
1231 data.useOpenCL = c.useOpenCL();
1238 data.useOpenCL = -1; // enabled by default (if context is not initialized)
1244 #ifdef HAVE_CLAMDBLAS
1249 static AmdBlasHelper & getInstance()
1251 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
1254 bool isAvailable() const
1256 return g_isAmdBlasAvailable;
1263 clAmdBlasTeardown();
1271 if (!g_isAmdBlasInitialized)
1273 AutoLock lock(getInitializationMutex());
1275 if (!g_isAmdBlasInitialized)
1281 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
1285 g_isAmdBlasAvailable = false;
1289 g_isAmdBlasAvailable = false;
1291 g_isAmdBlasInitialized = true;
1297 static bool g_isAmdBlasInitialized;
1298 static bool g_isAmdBlasAvailable;
1301 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
1302 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
1306 return AmdBlasHelper::getInstance().isAvailable();
1318 #ifdef HAVE_CLAMDFFT
1323 static AmdFftHelper & getInstance()
1325 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
1328 bool isAvailable() const
1330 return g_isAmdFftAvailable;
1337 // clAmdFftTeardown();
1345 if (!g_isAmdFftInitialized)
1347 AutoLock lock(getInitializationMutex());
1349 if (!g_isAmdFftInitialized)
1355 cl_uint major, minor, patch;
1356 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1358 // it throws exception in case AmdFft binaries are not found
1359 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1360 g_isAmdFftAvailable = true;
1362 catch (const Exception &)
1364 g_isAmdFftAvailable = false;
1368 g_isAmdFftAvailable = false;
1370 g_isAmdFftInitialized = true;
1376 static clAmdFftSetupData setupData;
1377 static bool g_isAmdFftInitialized;
1378 static bool g_isAmdFftAvailable;
1381 clAmdFftSetupData AmdFftHelper::setupData;
1382 bool AmdFftHelper::g_isAmdFftAvailable = false;
1383 bool AmdFftHelper::g_isAmdFftInitialized = false;
1387 return AmdFftHelper::getInstance().isAvailable();
1401 #ifdef HAVE_OPENCL_SVM
1410 Queue::getDefault().finish();
1413 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1415 struct Platform::Impl
1421 initialized = false;
1430 //cl_uint num_entries
1432 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1438 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1440 vendor = String(buf);
1447 IMPLEMENT_REFCOUNTABLE();
1449 cl_platform_id handle;
1454 Platform::Platform()
1459 Platform::~Platform()
1465 Platform::Platform(const Platform& pl)
1472 Platform& Platform::operator = (const Platform& pl)
1474 Impl* newp = (Impl*)pl.p;
1483 void* Platform::ptr() const
1485 return p ? p->handle : 0;
1488 Platform& Platform::getDefault()
1490 CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms");
1500 /////////////////////////////////////// Device ////////////////////////////////////////////
1502 // Version has format:
1503 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1505 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1506 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1507 // https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html
1508 // https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html
1509 static void parseOpenCLVersion(const String &version, int &major, int &minor)
1512 if (10 >= version.length())
1514 const char *pstr = version.c_str();
1515 if (0 != strncmp(pstr, "OpenCL ", 7))
1517 size_t ppos = version.find('.', 7);
1518 if (String::npos == ppos)
1520 String temp = version.substr(7, ppos - 7);
1521 major = atoi(temp.c_str());
1522 temp = version.substr(ppos + 1);
1523 minor = atoi(temp.c_str());
1534 cl_device_id device = (cl_device_id)d;
1536 CV_OCL_CHECK(clRetainDevice(device)); // increment reference counter on success only
1544 void _init(cl_device_id d)
1546 handle = (cl_device_id)d;
1548 name_ = getStrProp(CL_DEVICE_NAME);
1549 version_ = getStrProp(CL_DEVICE_VERSION);
1550 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1551 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1552 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1553 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1554 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1555 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1556 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1557 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1559 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1560 parseOpenCLVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1563 while (pos < extensions_.size())
1565 size_t pos2 = extensions_.find(' ', pos);
1566 if (pos2 == String::npos)
1567 pos2 = extensions_.size();
1570 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1571 extensions_set_.insert(extensionName);
1576 intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1578 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1579 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1580 vendorName_ == "AMD")
1581 vendorID_ = VENDOR_AMD;
1582 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1583 vendorID_ = VENDOR_INTEL;
1584 else if (vendorName_ == "NVIDIA Corporation")
1585 vendorID_ = VENDOR_NVIDIA;
1587 vendorID_ = UNKNOWN_VENDOR;
1589 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1590 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1592 const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1593 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1594 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1595 maxWorkGroupSize_ = new_maxWorkGroupSize;
1598 if (isExtensionSupported("cl_khr_spir"))
1600 #ifndef CL_DEVICE_SPIR_VERSIONS
1601 #define CL_DEVICE_SPIR_VERSIONS 0x40E0
1603 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1604 std::cout << spir_versions << std::endl;
1612 if (!cv::__termination)
1617 CV_OCL_CHECK(clReleaseDevice(handle));
1623 template<typename _TpCL, typename _TpOut>
1624 _TpOut getProp(cl_device_info prop) const
1629 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1630 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1633 bool getBoolProp(cl_device_info prop) const
1635 cl_bool temp = CL_FALSE;
1638 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1639 sz == sizeof(temp) ? temp != 0 : false;
1642 String getStrProp(cl_device_info prop) const
1646 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1647 sz < sizeof(buf) ? String(buf) : String();
1650 bool isExtensionSupported(const std::string& extensionName) const
1652 return extensions_set_.count(extensionName) > 0;
1656 IMPLEMENT_REFCOUNTABLE();
1658 cl_device_id handle;
1662 std::string extensions_;
1663 int doubleFPConfig_;
1664 bool hostUnifiedMemory_;
1665 int maxComputeUnits_;
1666 size_t maxWorkGroupSize_;
1669 int deviceVersionMajor_;
1670 int deviceVersionMinor_;
1671 String driverVersion_;
1674 bool intelSubgroupsSupport_;
1676 std::set<std::string> extensions_set_;
1685 Device::Device(void* d)
1691 Device::Device(const Device& d)
1698 Device& Device::operator = (const Device& d)
1700 Impl* newp = (Impl*)d.p;
1715 void Device::set(void* d)
1722 CV_OCL_CHECK(clReleaseDevice((cl_device_id)d));
1726 Device Device::fromHandle(void* d)
1732 void* Device::ptr() const
1734 return p ? p->handle : 0;
1737 String Device::name() const
1738 { return p ? p->name_ : String(); }
1740 String Device::extensions() const
1741 { return p ? String(p->extensions_) : String(); }
1743 bool Device::isExtensionSupported(const String& extensionName) const
1744 { return p ? p->isExtensionSupported(extensionName) : false; }
1746 String Device::version() const
1747 { return p ? p->version_ : String(); }
1749 String Device::vendorName() const
1750 { return p ? p->vendorName_ : String(); }
1752 int Device::vendorID() const
1753 { return p ? p->vendorID_ : 0; }
1755 String Device::OpenCL_C_Version() const
1756 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1758 String Device::OpenCLVersion() const
1759 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1761 int Device::deviceVersionMajor() const
1762 { return p ? p->deviceVersionMajor_ : 0; }
1764 int Device::deviceVersionMinor() const
1765 { return p ? p->deviceVersionMinor_ : 0; }
1767 String Device::driverVersion() const
1768 { return p ? p->driverVersion_ : String(); }
1770 int Device::type() const
1771 { return p ? p->type_ : 0; }
1773 int Device::addressBits() const
1774 { return p ? p->addressBits_ : 0; }
1776 bool Device::available() const
1777 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1779 bool Device::compilerAvailable() const
1780 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1782 bool Device::linkerAvailable() const
1783 #ifdef CL_VERSION_1_2
1784 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1786 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1789 int Device::doubleFPConfig() const
1790 { return p ? p->doubleFPConfig_ : 0; }
1792 int Device::singleFPConfig() const
1793 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1795 int Device::halfFPConfig() const
1796 #ifdef CL_VERSION_1_2
1797 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1799 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1802 bool Device::endianLittle() const
1803 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1805 bool Device::errorCorrectionSupport() const
1806 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1808 int Device::executionCapabilities() const
1809 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1811 size_t Device::globalMemCacheSize() const
1812 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1814 int Device::globalMemCacheType() const
1815 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1817 int Device::globalMemCacheLineSize() const
1818 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1820 size_t Device::globalMemSize() const
1821 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1823 size_t Device::localMemSize() const
1824 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1826 int Device::localMemType() const
1827 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1829 bool Device::hostUnifiedMemory() const
1830 { return p ? p->hostUnifiedMemory_ : false; }
1832 bool Device::imageSupport() const
1833 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1835 bool Device::imageFromBufferSupport() const
1837 return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1840 uint Device::imagePitchAlignment() const
1842 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1843 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1849 uint Device::imageBaseAddressAlignment() const
1851 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1852 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1858 size_t Device::image2DMaxWidth() const
1859 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1861 size_t Device::image2DMaxHeight() const
1862 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1864 size_t Device::image3DMaxWidth() const
1865 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1867 size_t Device::image3DMaxHeight() const
1868 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1870 size_t Device::image3DMaxDepth() const
1871 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1873 size_t Device::imageMaxBufferSize() const
1874 #ifdef CL_VERSION_1_2
1875 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1877 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1880 size_t Device::imageMaxArraySize() const
1881 #ifdef CL_VERSION_1_2
1882 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1884 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1887 bool Device::intelSubgroupsSupport() const
1888 { return p ? p->intelSubgroupsSupport_ : false; }
1890 int Device::maxClockFrequency() const
1891 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1893 int Device::maxComputeUnits() const
1894 { return p ? p->maxComputeUnits_ : 0; }
1896 int Device::maxConstantArgs() const
1897 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1899 size_t Device::maxConstantBufferSize() const
1900 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1902 size_t Device::maxMemAllocSize() const
1903 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1905 size_t Device::maxParameterSize() const
1906 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1908 int Device::maxReadImageArgs() const
1909 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1911 int Device::maxWriteImageArgs() const
1912 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1914 int Device::maxSamplers() const
1915 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1917 size_t Device::maxWorkGroupSize() const
1918 { return p ? p->maxWorkGroupSize_ : 0; }
1920 int Device::maxWorkItemDims() const
1921 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1923 void Device::maxWorkItemSizes(size_t* sizes) const
1927 const int MAX_DIMS = 32;
1929 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1930 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1934 int Device::memBaseAddrAlign() const
1935 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1937 int Device::nativeVectorWidthChar() const
1938 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1940 int Device::nativeVectorWidthShort() const
1941 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1943 int Device::nativeVectorWidthInt() const
1944 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1946 int Device::nativeVectorWidthLong() const
1947 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1949 int Device::nativeVectorWidthFloat() const
1950 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1952 int Device::nativeVectorWidthDouble() const
1953 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1955 int Device::nativeVectorWidthHalf() const
1956 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1958 int Device::preferredVectorWidthChar() const
1959 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1961 int Device::preferredVectorWidthShort() const
1962 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1964 int Device::preferredVectorWidthInt() const
1965 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1967 int Device::preferredVectorWidthLong() const
1968 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
1970 int Device::preferredVectorWidthFloat() const
1971 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
1973 int Device::preferredVectorWidthDouble() const
1974 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
1976 int Device::preferredVectorWidthHalf() const
1977 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
1979 size_t Device::printfBufferSize() const
1980 #ifdef CL_VERSION_1_2
1981 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
1983 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1987 size_t Device::profilingTimerResolution() const
1988 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
1990 const Device& Device::getDefault()
1992 auto& c = OpenCLExecutionContext::getCurrent();
1995 return c.getDevice();
1998 static Device dummy;
2002 ////////////////////////////////////// Context ///////////////////////////////////////////////////
2004 template <typename Functor, typename ObjectType>
2005 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
2008 cl_int err = f(obj, name, 0, NULL, &required);
2009 if (err != CL_SUCCESS)
2015 AutoBuffer<char> buf(required + 1);
2016 char* ptr = buf.data(); // cleanup is not needed
2017 err = f(obj, name, required, ptr, NULL);
2018 if (err != CL_SUCCESS)
2026 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
2031 std::istringstream ss(s);
2035 std::getline(ss, item, delim);
2036 elems.push_back(item);
2040 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
2042 // Sample: AMD:GPU:Tahiti
2043 // Sample: :GPU|CPU: = '' = ':' = '::'
2044 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
2045 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
2047 std::vector<std::string> parts;
2048 split(configurationStr, ':', parts);
2049 if (parts.size() > 3)
2051 CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr);
2054 if (parts.size() > 2)
2055 deviceNameOrID = parts[2];
2056 if (parts.size() > 1)
2058 split(parts[1], '|', deviceTypes);
2060 if (parts.size() > 0)
2062 platform = parts[0];
2067 #if defined WINRT || defined _WIN32_WCE
2068 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2070 CV_UNUSED(configuration)
2074 static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
2076 std::string platform, deviceName;
2077 std::vector<std::string> deviceTypes;
2080 configuration = getenv("OPENCV_OPENCL_DEVICE");
2082 if (configuration &&
2083 (strcmp(configuration, "disabled") == 0 ||
2084 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
2090 if (deviceName.length() == 1)
2091 // We limit ID range to 0..9, because we want to write:
2092 // - '2500' to mean i5-2500
2093 // - '8350' to mean AMD FX-8350
2094 // - '650' to mean GeForce 650
2095 // To extend ID range change condition to '> 0'
2098 for (size_t i = 0; i < deviceName.length(); i++)
2100 if (!isdigit(deviceName[i]))
2108 deviceID = atoi(deviceName.c_str());
2114 std::vector<cl_platform_id> platforms;
2116 cl_uint numPlatforms = 0;
2117 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
2119 if (numPlatforms == 0)
2121 platforms.resize((size_t)numPlatforms);
2122 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
2123 platforms.resize(numPlatforms);
2126 int selectedPlatform = -1;
2127 if (platform.length() > 0)
2129 for (size_t i = 0; i < platforms.size(); i++)
2132 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
2133 if (name.find(platform) != std::string::npos)
2135 selectedPlatform = (int)i;
2139 if (selectedPlatform == -1)
2141 CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform);
2145 if (deviceTypes.size() == 0)
2149 deviceTypes.push_back("GPU");
2151 deviceTypes.push_back("CPU");
2154 deviceTypes.push_back("ALL");
2156 for (size_t t = 0; t < deviceTypes.size(); t++)
2159 std::string tempStrDeviceType = deviceTypes[t];
2160 std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), details::char_tolower);
2162 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2163 deviceType = Device::TYPE_GPU;
2164 else if (tempStrDeviceType == "cpu")
2165 deviceType = Device::TYPE_CPU;
2166 else if (tempStrDeviceType == "accelerator")
2167 deviceType = Device::TYPE_ACCELERATOR;
2168 else if (tempStrDeviceType == "all")
2169 deviceType = Device::TYPE_ALL;
2172 CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]);
2176 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
2177 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
2178 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
2182 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
2183 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2185 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
2189 size_t base = devices.size();
2190 devices.resize(base + count);
2191 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
2192 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
2194 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
2198 for (size_t i = (isID ? deviceID : 0);
2199 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
2203 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
2204 cl_bool useGPU = true;
2205 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
2207 cl_bool isIGPU = CL_FALSE;
2208 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
2209 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
2211 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
2213 // TODO check for OpenCL 1.1
2221 return NULL; // suppress messages on stderr
2223 std::ostringstream msg;
2224 msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl
2225 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
2226 << " Device types:";
2227 for (size_t t = 0; t < deviceTypes.size(); t++)
2228 msg << ' ' << deviceTypes[t];
2230 msg << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName);
2232 CV_LOG_ERROR(NULL, msg.str());
2237 #ifdef HAVE_OPENCL_SVM
2240 enum AllocatorFlags { // don't use first 16 bits
2241 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
2242 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
2243 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
2244 OPENCL_SVM_BUFFER_MASK = 3 << 16,
2245 OPENCL_SVM_BUFFER_MAP = 4 << 16
2248 static bool checkForceSVMUmatUsage()
2250 static bool initialized = false;
2251 static bool force = false;
2254 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
2259 static bool checkDisableSVMUMatUsage()
2261 static bool initialized = false;
2262 static bool force = false;
2265 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
2270 static bool checkDisableSVM()
2272 static bool initialized = false;
2273 static bool force = false;
2276 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
2281 // see SVMCapabilities
2282 static unsigned int getSVMCapabilitiesMask()
2284 static bool initialized = false;
2285 static unsigned int mask = 0;
2288 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
2289 if (envValue == NULL)
2291 return ~0U; // all bits 1
2293 mask = atoi(envValue);
2301 static size_t getProgramCountLimit()
2303 static bool initialized = false;
2304 static size_t count = 0;
2307 count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
2313 static int g_contextId = 0;
2315 class OpenCLBufferPoolImpl;
2316 class OpenCLSVMBufferPoolImpl;
2318 struct Context::Impl
2320 static Context::Impl* get(Context& context) { return context.p; }
2322 typedef std::deque<Context::Impl*> container_t;
2323 static container_t& getGlobalContainer()
2325 // never delete this container (Impl lifetime is greater due to TLS storage)
2326 static container_t* g_contexts = new container_t();
2331 Impl(const std::string& configuration_)
2333 , contextId(CV_XADD(&g_contextId, 1))
2334 , configuration(configuration_)
2339 #ifdef HAVE_OPENCL_SVM
2340 , svmInitialized(false)
2344 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
2346 cv::AutoLock lock(cv::getInitializationMutex());
2347 auto& container = getGlobalContainer();
2348 container.resize(std::max(container.size(), (size_t)contextId + 1));
2349 container[contextId] = this;
2355 if (!cv::__termination)
2360 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2365 directx::internal::deleteDirectXImpl(&p_directx_impl);
2370 cv::AutoLock lock(cv::getInitializationMutex());
2371 auto& container = getGlobalContainer();
2372 CV_CheckLT((size_t)contextId, container.size(), "");
2373 container[contextId] = NULL;
2377 void init_device_list()
2381 cl_uint ndevices = 0;
2382 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL));
2383 CV_Assert(ndevices > 0);
2385 cv::AutoBuffer<cl_device_id> cl_devices(ndevices);
2386 size_t devices_ret_size = 0;
2387 CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size));
2388 CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), "");
2391 for (unsigned i = 0; i < ndevices; i++)
2393 devices.emplace_back(Device::fromHandle(cl_devices[i]));
2397 void __init_buffer_pools(); // w/o synchronization
2398 void _init_buffer_pools() const
2402 cv::AutoLock lock(cv::getInitializationMutex());
2405 const_cast<Impl*>(this)->__init_buffer_pools();
2410 static Impl* findContext(const std::string& configuration)
2412 CV_TRACE_FUNCTION();
2413 cv::AutoLock lock(cv::getInitializationMutex());
2414 auto& container = getGlobalContainer();
2415 if (configuration.empty() && !container.empty())
2416 return container[0];
2417 for (auto it = container.begin(); it != container.end(); ++it)
2420 if (i && i->configuration == configuration)
2428 static Impl* findOrCreateContext(const std::string& configuration_)
2430 CV_TRACE_FUNCTION();
2431 std::string configuration = configuration_;
2432 if (configuration_.empty())
2434 const char* c = getenv("OPENCV_OPENCL_DEVICE");
2438 Impl* impl = findContext(configuration);
2441 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2446 cl_device_id d = selectOpenCLDevice(configuration.empty() ? NULL : configuration.c_str());
2450 impl = new Impl(configuration);
2453 impl->createFromDevice(d);
2466 static Impl* findOrCreateContext(cl_context h)
2468 CV_TRACE_FUNCTION();
2472 std::string configuration = cv::format("@ctx-%p", (void*)h);
2473 Impl* impl = findContext(configuration);
2476 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2481 impl = new Impl(configuration);
2484 CV_OCL_CHECK(clRetainContext(h));
2486 impl->init_device_list();
2496 static Impl* findOrCreateContext(const ocl::Device& device)
2498 CV_TRACE_FUNCTION();
2500 CV_Assert(!device.empty());
2501 cl_device_id d = (cl_device_id)device.ptr();
2504 std::string configuration = cv::format("@dev-%p", (void*)d);
2505 Impl* impl = findContext(configuration);
2508 CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
2513 impl = new Impl(configuration);
2516 impl->createFromDevice(d);
2517 CV_Assert(impl->handle);
2529 CV_TRACE_FUNCTION();
2530 cl_device_id d = selectOpenCLDevice();
2535 createFromDevice(d);
2538 void createFromDevice(cl_device_id d)
2540 CV_TRACE_FUNCTION();
2541 CV_Assert(handle == NULL);
2543 cl_platform_id pl = NULL;
2544 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
2546 cl_context_properties prop[] =
2548 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
2552 // !!! in the current implementation force the number of devices to 1 !!!
2556 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
2557 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
2559 bool ok = handle != 0 && status == CL_SUCCESS;
2569 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2571 void unloadProg(Program& prog)
2573 cv::AutoLock lock(program_cache_mutex);
2574 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2576 phash_t::iterator it = phash.find(*i);
2577 if (it != phash.end())
2579 if (it->second.ptr() == prog.ptr())
2589 std::string& getPrefixString()
2593 cv::AutoLock lock(program_cache_mutex);
2596 CV_Assert(!devices.empty());
2597 const Device& d = devices[0];
2598 int bits = d.addressBits();
2599 if (bits > 0 && bits != 64)
2600 prefix = cv::format("%d-bit--", bits);
2601 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2603 for (size_t i = 0; i < prefix.size(); i++)
2606 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2616 std::string& getPrefixBase()
2618 if (prefix_base.empty())
2620 cv::AutoLock lock(program_cache_mutex);
2621 if (prefix_base.empty())
2623 const Device& d = devices[0];
2624 int bits = d.addressBits();
2625 if (bits > 0 && bits != 64)
2626 prefix_base = cv::format("%d-bit--", bits);
2627 prefix_base += d.vendorName() + "--" + d.name() + "--";
2629 for (size_t i = 0; i < prefix_base.size(); i++)
2631 char c = prefix_base[i];
2632 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2634 prefix_base[i] = '_';
2642 IMPLEMENT_REFCOUNTABLE();
2644 const int contextId; // global unique ID
2645 const std::string configuration;
2648 std::vector<Device> devices;
2651 std::string prefix_base;
2653 cv::Mutex program_cache_mutex;
2654 typedef std::map<std::string, Program> phash_t;
2656 typedef std::list<cv::String> CacheList;
2657 CacheList cacheList;
2659 std::shared_ptr<OpenCLBufferPoolImpl> bufferPool_;
2660 std::shared_ptr<OpenCLBufferPoolImpl> bufferPoolHostPtr_;
2661 OpenCLBufferPoolImpl& getBufferPool() const
2663 _init_buffer_pools();
2664 CV_DbgAssert(bufferPool_);
2665 return *bufferPool_.get();
2667 OpenCLBufferPoolImpl& getBufferPoolHostPtr() const
2669 _init_buffer_pools();
2670 CV_DbgAssert(bufferPoolHostPtr_);
2671 return *bufferPoolHostPtr_.get();
2675 directx::internal::OpenCLDirectXImpl* p_directx_impl;
2677 directx::internal::OpenCLDirectXImpl* getDirectXImpl()
2679 if (!p_directx_impl)
2681 p_directx_impl = directx::internal::createDirectXImpl();
2683 return p_directx_impl;
2687 #ifdef HAVE_OPENCL_SVM
2688 bool svmInitialized;
2691 svm::SVMCapabilities svmCapabilities;
2692 svm::SVMFunctions svmFunctions;
2696 CV_Assert(handle != NULL);
2697 const Device& device = devices[0];
2698 cl_device_svm_capabilities deviceCaps = 0;
2699 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2700 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2701 if (status != CL_SUCCESS)
2703 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2706 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2707 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2708 svmCapabilities.value_ =
2709 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2710 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2711 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2712 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2713 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2714 if (svmCapabilities.value_ == 0)
2716 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2722 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2723 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2726 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2727 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2732 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2733 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2735 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2736 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2741 ((int*)ptr)[0] = 100;
2745 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2748 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2750 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2751 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2756 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2761 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2762 clSVMFree(handle, ptr);
2765 clSVMFree(handle, ptr);
2766 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2767 svmFunctions.fn_clSVMFree = clSVMFree;
2768 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2769 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2770 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2771 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2772 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2773 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2774 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2778 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2781 // Try HSA extension
2782 String extensions = device.extensions();
2783 if (extensions.find("cl_amd_svm") == String::npos)
2785 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2788 cl_platform_id p = NULL;
2789 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2790 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2791 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2792 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2793 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2794 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2795 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2796 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2797 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2798 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2799 CV_Assert(svmFunctions.isValid());
2803 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2808 svmAvailable = true;
2809 svmEnabled = !svm::checkDisableSVM();
2810 svmInitialized = true;
2811 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2814 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2815 svmAvailable = false;
2817 svmCapabilities.value_ = 0;
2818 svmInitialized = true;
2819 svmFunctions.fn_clSVMAlloc = NULL;
2823 std::shared_ptr<OpenCLSVMBufferPoolImpl> bufferPoolSVM_;
2825 OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const
2827 _init_buffer_pools();
2828 CV_DbgAssert(bufferPoolSVM_);
2829 return *bufferPoolSVM_.get();
2833 friend class Program;
2848 Context::Context(int dtype)
2854 void Context::release()
2863 bool Context::create()
2868 p = Impl::findOrCreateContext(std::string());
2876 bool Context::create(int dtype)
2881 if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL)
2883 p = Impl::findOrCreateContext("");
2885 else if (dtype == CL_DEVICE_TYPE_GPU)
2887 p = Impl::findOrCreateContext(":GPU:");
2889 else if (dtype == CL_DEVICE_TYPE_CPU)
2891 p = Impl::findOrCreateContext(":CPU:");
2895 CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype);
2897 if (p && !p->handle)
2904 Context::Context(const Context& c)
2911 Context& Context::operator = (const Context& c)
2913 Impl* newp = (Impl*)c.p;
2922 void* Context::ptr() const
2924 return p == NULL ? NULL : p->handle;
2927 size_t Context::ndevices() const
2929 return p ? p->devices.size() : 0;
2932 Device& Context::device(size_t idx) const
2934 static Device dummy;
2935 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2938 Context& Context::getDefault(bool initialize)
2940 auto& c = OpenCLExecutionContext::getCurrent();
2943 auto& ctx = c.getContext();
2947 CV_UNUSED(initialize);
2948 static Context dummy;
2952 Program Context::getProg(const ProgramSource& prog,
2953 const String& buildopts, String& errmsg)
2955 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2958 void Context::unloadProg(Program& prog)
2961 p->unloadProg(prog);
2965 Context Context::fromHandle(void* context)
2968 ctx.p = Impl::findOrCreateContext((cl_context)context);
2973 Context Context::fromDevice(const ocl::Device& device)
2976 ctx.p = Impl::findOrCreateContext(device);
2981 Context Context::create(const std::string& configuration)
2984 ctx.p = Impl::findOrCreateContext(configuration);
2988 #ifdef HAVE_OPENCL_SVM
2989 bool Context::useSVM() const
2991 Context::Impl* i = p;
2993 if (!i->svmInitialized)
2995 return i->svmEnabled;
2997 void Context::setUseSVM(bool enabled)
2999 Context::Impl* i = p;
3001 if (!i->svmInitialized)
3003 if (enabled && !i->svmAvailable)
3005 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
3007 i->svmEnabled = enabled;
3010 bool Context::useSVM() const { return false; }
3011 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
3014 #ifdef HAVE_OPENCL_SVM
3017 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
3019 Context::Impl* i = context.p;
3021 if (!i->svmInitialized)
3023 return i->svmCapabilities;
3026 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
3028 Context::Impl* i = context.p;
3030 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
3031 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
3032 return &i->svmFunctions;
3035 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
3037 if (checkForceSVMUmatUsage())
3039 if (checkDisableSVMUMatUsage())
3041 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
3043 return false; // don't use SVM by default
3046 } // namespace cv::ocl::svm
3047 #endif // HAVE_OPENCL_SVM
3050 static void get_platform_name(cl_platform_id id, String& name)
3052 // get platform name string length
3054 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
3056 // get platform name string
3057 AutoBuffer<char> buf(sz + 1);
3058 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
3060 // just in case, ensure trailing zero for ASCIIZ string
3067 // Attaches OpenCL context to OpenCV
3069 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
3071 auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3076 OpenCLExecutionContext OpenCLExecutionContext::create(
3077 const std::string& platformName, void* platformID, void* context, void* deviceID
3081 CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
3084 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
3087 CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!");
3089 std::vector<cl_platform_id> platforms(cnt);
3091 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
3093 bool platformAvailable = false;
3095 // check if external platformName contained in list of available platforms in OpenCV
3096 for (unsigned int i = 0; i < cnt; i++)
3098 String availablePlatformName;
3099 get_platform_name(platforms[i], availablePlatformName);
3100 // external platform is found in the list of available platforms
3101 if (platformName == availablePlatformName)
3103 platformAvailable = true;
3108 if (!platformAvailable)
3109 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3111 // check if platformID corresponds to platformName
3112 String actualPlatformName;
3113 get_platform_name((cl_platform_id)platformID, actualPlatformName);
3114 if (platformName != actualPlatformName)
3115 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
3117 OpenCLExecutionContext ctx;
3118 ctx.p = std::make_shared<OpenCLExecutionContext::Impl>((cl_platform_id)platformID, (cl_context)context, (cl_device_id)deviceID);
3119 CV_OCL_CHECK(clReleaseContext((cl_context)context));
3120 CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID));
3124 void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device)
3126 // internal call, less checks
3127 cl_platform_id platformID = (cl_platform_id)_platform;
3128 cl_context context = (cl_context)_context;
3129 cl_device_id deviceID = (cl_device_id)_device;
3131 std::string platformName = PlatformInfo(&platformID).name();
3133 auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
3134 CV_Assert(!clExecCtx.empty());
3135 ctx = clExecCtx.getContext();
3138 /////////////////////////////////////////// Queue /////////////////////////////////////////////
3142 inline void __init()
3146 isProfilingQueue_ = false;
3149 Impl(cl_command_queue q)
3154 cl_command_queue_properties props = 0;
3155 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
3156 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
3159 Impl(cl_command_queue q, bool isProfilingQueue)
3163 isProfilingQueue_ = isProfilingQueue;
3166 Impl(const Context& c, const Device& d, bool withProfiling = false)
3170 const Context* pc = &c;
3171 cl_context ch = (cl_context)pc->ptr();
3174 pc = &Context::getDefault();
3175 ch = (cl_context)pc->ptr();
3177 cl_device_id dh = (cl_device_id)d.ptr();
3179 dh = (cl_device_id)pc->device(0).ptr();
3181 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
3182 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
3183 isProfilingQueue_ = withProfiling;
3189 if (!cv::__termination)
3194 CV_OCL_DBG_CHECK(clFinish(handle));
3195 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
3201 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
3203 if (isProfilingQueue_)
3206 if (profiling_queue_.ptr())
3207 return profiling_queue_;
3210 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
3212 cl_device_id device = 0;
3213 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
3215 cl_int result = CL_SUCCESS;
3216 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
3217 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
3218 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
3221 queue.p = new Impl(q, true);
3222 profiling_queue_ = queue;
3224 return profiling_queue_;
3227 IMPLEMENT_REFCOUNTABLE();
3229 cl_command_queue handle;
3230 bool isProfilingQueue_;
3231 cv::ocl::Queue profiling_queue_;
3239 Queue::Queue(const Context& c, const Device& d)
3245 Queue::Queue(const Queue& q)
3252 Queue& Queue::operator = (const Queue& q)
3254 Impl* newp = (Impl*)q.p;
3269 bool Queue::create(const Context& c, const Device& d)
3274 return p->handle != 0;
3277 void Queue::finish()
3281 CV_OCL_DBG_CHECK(clFinish(p->handle));
3285 const Queue& Queue::getProfilingQueue() const
3288 return p->getProfilingQueue(*this);
3291 void* Queue::ptr() const
3293 return p ? p->handle : 0;
3296 Queue& Queue::getDefault()
3298 auto& c = OpenCLExecutionContext::getCurrent();
3301 auto& q = c.getQueue();
3308 static cl_command_queue getQueue(const Queue& q)
3310 cl_command_queue qq = (cl_command_queue)q.ptr();
3312 qq = (cl_command_queue)Queue::getDefault().ptr();
3316 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
3318 KernelArg::KernelArg()
3319 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
3323 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
3324 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
3326 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
3329 KernelArg KernelArg::Constant(const Mat& m)
3331 CV_Assert(m.isContinuous());
3332 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
3335 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
3339 Impl(const char* kname, const Program& prog) :
3340 refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
3342 cl_program ph = (cl_program)prog.ptr();
3347 handle = clCreateKernel(ph, kname, &retval);
3348 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
3350 for( int i = 0; i < MAX_ARRS; i++ )
3352 haveTempDstUMats = false;
3353 haveTempSrcUMats = false;
3358 for( int i = 0; i < MAX_ARRS; i++ )
3361 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
3363 u[i]->flags |= UMatData::ASYNC_CLEANUP;
3364 u[i]->currAllocator->deallocate(u[i]);
3369 haveTempDstUMats = false;
3370 haveTempSrcUMats = false;
3373 void addUMat(const UMat& m, bool dst)
3375 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
3377 CV_XADD(&m.u->urefcount, 1);
3379 if(dst && m.u->tempUMat())
3380 haveTempDstUMats = true;
3381 if(m.u->originalUMatData == NULL && m.u->tempUMat())
3382 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
3385 void addImage(const Image2D& image)
3387 images.push_back(image);
3390 void finit(cl_event e)
3395 isInProgress = false;
3399 bool run(int dims, size_t _globalsize[], size_t _localsize[],
3400 bool sync, int64* timeNS, const Queue& q);
3406 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
3410 IMPLEMENT_REFCOUNTABLE();
3414 enum { MAX_ARRS = 16 };
3415 UMatData* u[MAX_ARRS];
3417 bool isAsyncRun; // true if kernel was scheduled in async mode
3419 std::list<Image2D> images;
3420 bool haveTempDstUMats;
3421 bool haveTempSrcUMats;
3424 }} // namespace cv::ocl
3428 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
3432 ((cv::ocl::Kernel::Impl*)p)->finit(e);
3434 catch (const cv::Exception& exc)
3436 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
3438 catch (const std::exception& exc)
3440 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
3444 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
3450 namespace cv { namespace ocl {
3457 Kernel::Kernel(const char* kname, const Program& prog)
3460 create(kname, prog);
3463 Kernel::Kernel(const char* kname, const ProgramSource& src,
3464 const String& buildopts, String* errmsg)
3467 create(kname, src, buildopts, errmsg);
3470 Kernel::Kernel(const Kernel& k)
3477 Kernel& Kernel::operator = (const Kernel& k)
3479 Impl* newp = (Impl*)k.p;
3494 bool Kernel::create(const char* kname, const Program& prog)
3498 p = new Impl(kname, prog);
3504 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
3510 bool Kernel::create(const char* kname, const ProgramSource& src,
3511 const String& buildopts, String* errmsg)
3519 if( !errmsg ) errmsg = &tempmsg;
3520 const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
3521 return create(kname, prog);
3524 void* Kernel::ptr() const
3526 return p ? p->handle : 0;
3529 bool Kernel::empty() const
3534 static cv::String dumpValue(size_t sz, const void* p)
3537 return cv::format("%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p);
3539 return cv::format("%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p);
3540 return cv::format("%p", p);
3543 int Kernel::set(int i, const void* value, size_t sz)
3545 if (!p || !p->handle)
3552 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
3553 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str());
3554 if (retval != CL_SUCCESS)
3559 int Kernel::set(int i, const Image2D& image2D)
3561 p->addImage(image2D);
3562 cl_mem h = (cl_mem)image2D.ptr();
3563 return set(i, &h, sizeof(h));
3566 int Kernel::set(int i, const UMat& m)
3568 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
3571 int Kernel::set(int i, const KernelArg& arg)
3573 if( !p || !p->handle )
3577 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
3578 p->name.c_str(), (int)i));
3586 AccessFlag accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : static_cast<AccessFlag>(0)) |
3587 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : static_cast<AccessFlag>(0));
3588 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
3589 if (ptronly && arg.m->empty())
3591 cl_mem h_null = (cl_mem)NULL;
3592 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3593 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3596 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3600 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3601 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3607 #ifdef HAVE_OPENCL_SVM
3608 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3610 const Context& ctx = Context::getDefault();
3611 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3612 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3613 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3615 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3617 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3619 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3624 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3625 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3632 else if( arg.m->dims <= 2 )
3635 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3636 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3637 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3638 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3641 if( !(arg.flags & KernelArg::NO_SIZE) )
3643 int cols = u2d.cols*arg.wscale/arg.iwscale;
3644 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3645 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3646 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3647 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3654 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3655 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3656 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3657 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3658 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3659 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3661 if( !(arg.flags & KernelArg::NO_SIZE) )
3663 int cols = u3d.cols*arg.wscale/arg.iwscale;
3664 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3665 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3666 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3667 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3668 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3669 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3673 p->addUMat(*arg.m, !!(accessFlags & ACCESS_WRITE));
3676 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3677 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3681 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3682 bool sync, const Queue& q)
3687 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3689 CV_Assert(_globalsize != NULL);
3690 for (int i = 0; i < dims; i++)
3692 size_t val = _localsize ? _localsize[i] :
3693 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3694 CV_Assert( val > 0 );
3695 total *= _globalsize[i];
3696 if (_globalsize[i] == 1 && !_localsize)
3698 globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3700 CV_Assert(total > 0);
3702 return p->run(dims, globalsize, _localsize, sync, NULL, q);
3706 static bool isRaiseErrorOnReuseAsyncKernel()
3708 static bool initialized = false;
3709 static bool value = false;
3712 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
3718 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3719 bool sync, int64* timeNS, const Queue& q)
3721 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3725 CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
3731 CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3732 if (isRaiseErrorOnReuseAsyncKernel())
3734 return false; // OpenCV 5.0: raise error
3740 CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3741 if (isRaiseErrorOnReuseAsyncKernel())
3743 return false; // OpenCV 5.0: raise error
3746 cl_command_queue qq = getQueue(q);
3747 if (haveTempDstUMats)
3749 if (haveTempSrcUMats)
3753 cl_event asyncEvent = 0;
3754 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3755 NULL, globalsize, localsize, 0, 0,
3756 (sync && !timeNS) ? 0 : &asyncEvent);
3757 #if !CV_OPENCL_SHOW_RUN_KERNELS
3758 if (retval != CL_SUCCESS)
3761 cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%zux%zux%zu, localsize=%s) sync=%s", name.c_str(), (int)dims,
3762 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3763 (localsize ? cv::format("%zux%zux%zu", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3764 sync ? "true" : "false"
3766 if (retval != CL_SUCCESS)
3768 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3770 #if CV_OPENCL_TRACE_CHECK
3771 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3773 printf("%s\n", msg.c_str());
3777 if (sync || retval != CL_SUCCESS)
3779 CV_OCL_DBG_CHECK(clFinish(qq));
3782 if (retval == CL_SUCCESS)
3784 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3785 cl_ulong startTime, stopTime;
3786 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3787 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3788 *timeNS = (int64)(stopTime - startTime);
3800 isInProgress = true;
3801 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3804 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3805 return retval == CL_SUCCESS;
3808 bool Kernel::runTask(bool sync, const Queue& q)
3810 if(!p || !p->handle || p->isInProgress)
3813 cl_command_queue qq = getQueue(q);
3814 cl_event asyncEvent = 0;
3815 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3816 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3817 if (sync || retval != CL_SUCCESS)
3819 CV_OCL_DBG_CHECK(clFinish(qq));
3825 p->isInProgress = true;
3826 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3829 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3830 return retval == CL_SUCCESS;
3833 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3835 CV_Assert(p && p->handle && !p->isInProgress);
3836 Queue q = q_.ptr() ? q_ : Queue::getDefault();
3838 q.finish(); // call clFinish() on base queue
3839 Queue profilingQueue = q.getProfilingQueue();
3841 bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3842 return res ? timeNs : -1;
3845 size_t Kernel::workGroupSize() const
3847 if(!p || !p->handle)
3849 size_t val = 0, retsz = 0;
3850 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3851 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3852 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3853 return status == CL_SUCCESS ? val : 0;
3856 size_t Kernel::preferedWorkGroupSizeMultiple() const
3858 if(!p || !p->handle)
3860 size_t val = 0, retsz = 0;
3861 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3862 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3863 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3864 return status == CL_SUCCESS ? val : 0;
3867 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3869 if(!p || !p->handle || !wsz)
3872 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3873 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3874 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3875 return status == CL_SUCCESS;
3878 size_t Kernel::localMemSize() const
3880 if(!p || !p->handle)
3884 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3885 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
3886 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
3887 return status == CL_SUCCESS ? (size_t)val : 0;
3892 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3894 struct ProgramSource::Impl
3896 IMPLEMENT_REFCOUNTABLE();
3899 PROGRAM_SOURCE_CODE = 0,
3905 Impl(const String& src)
3907 init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
3908 initFromSource(src, cv::String());
3910 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
3912 init(PROGRAM_SOURCE_CODE, module, name);
3913 initFromSource(codeStr, codeHash);
3917 void init(enum KIND kind, const String& module, const String& name)
3926 isHashUpdated = false;
3929 void initFromSource(const String& codeStr, const String& codeHash)
3932 sourceHash_ = codeHash;
3933 if (sourceHash_.empty())
3939 isHashUpdated = true;
3943 void updateHash(const char* hashStr = NULL)
3947 sourceHash_ = cv::String(hashStr);
3948 isHashUpdated = true;
3954 case PROGRAM_SOURCE_CODE:
3957 CV_Assert(codeStr_.empty());
3958 hash = crc64(sourceAddr_, sourceSize_); // static storage
3962 CV_Assert(!codeStr_.empty());
3963 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
3966 case PROGRAM_BINARIES:
3969 hash = crc64(sourceAddr_, sourceSize_);
3972 CV_Error(Error::StsInternal, "Internal error");
3974 sourceHash_ = cv::format("%08jx", (uintmax_t)hash);
3975 isHashUpdated = true;
3978 Impl(enum KIND kind,
3979 const String& module, const String& name,
3980 const unsigned char* binary, const size_t size,
3981 const cv::String& buildOptions = cv::String())
3983 init(kind, module, name);
3985 sourceAddr_ = binary;
3988 buildOptions_ = buildOptions;
3991 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
3992 const char* sourceCodeStaticStr, const char* hashStaticStr,
3993 const cv::String& buildOptions)
3995 ProgramSource result;
3996 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
3997 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
3998 result.p->updateHash(hashStaticStr);
4002 static ProgramSource fromBinary(const String& module, const String& name,
4003 const unsigned char* binary, const size_t size,
4004 const cv::String& buildOptions)
4006 ProgramSource result;
4007 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
4011 static ProgramSource fromSPIR(const String& module, const String& name,
4012 const unsigned char* binary, const size_t size,
4013 const cv::String& buildOptions)
4015 ProgramSource result;
4016 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
4023 // TODO std::vector<ProgramSource> includes_;
4024 String codeStr_; // PROGRAM_SOURCE_CODE only
4026 const unsigned char* sourceAddr_;
4029 cv::String buildOptions_;
4034 friend struct Program::Impl;
4035 friend struct internal::ProgramEntry;
4036 friend struct Context::Impl;
4040 ProgramSource::ProgramSource()
4045 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
4047 p = new Impl(module, name, codeStr, codeHash);
4050 ProgramSource::ProgramSource(const char* prog)
4055 ProgramSource::ProgramSource(const String& prog)
4060 ProgramSource::~ProgramSource()
4066 ProgramSource::ProgramSource(const ProgramSource& prog)
4073 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
4075 Impl* newp = (Impl*)prog.p;
4084 const String& ProgramSource::source() const
4087 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
4088 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
4092 ProgramSource::hash_t ProgramSource::hash() const
4094 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
4097 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
4098 const unsigned char* binary, const size_t size,
4099 const cv::String& buildOptions)
4102 CV_Assert(size > 0);
4103 return Impl::fromBinary(module, name, binary, size, buildOptions);
4106 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
4107 const unsigned char* binary, const size_t size,
4108 const cv::String& buildOptions)
4111 CV_Assert(size > 0);
4112 return Impl::fromBinary(module, name, binary, size, buildOptions);
4116 internal::ProgramEntry::operator ProgramSource&() const
4118 if (this->pProgramSource == NULL)
4120 cv::AutoLock lock(cv::getInitializationMutex());
4121 if (this->pProgramSource == NULL)
4123 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
4124 ProgramSource* ptr = new ProgramSource(ps);
4125 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
4128 return *this->pProgramSource;
4133 /////////////////////////////////////////// Program /////////////////////////////////////////////
4136 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
4144 return a + (cv::String(" ") + b);
4147 struct Program::Impl
4149 IMPLEMENT_REFCOUNTABLE();
4151 Impl(const ProgramSource& src,
4152 const String& _buildflags, String& errmsg) :
4155 buildflags(_buildflags)
4157 const ProgramSource::Impl* src_ = src.getImpl();
4159 sourceModule_ = src_->module_;
4160 sourceName_ = src_->name_;
4161 const Context ctx = Context::getDefault();
4162 Device device = ctx.device(0);
4163 if (ctx.ptr() == NULL || device.ptr() == NULL)
4165 buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
4166 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4169 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
4170 else if (device.isIntel())
4171 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
4172 const String param_buildExtraOptions = getBuildExtraOptions();
4173 if (!param_buildExtraOptions.empty())
4174 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
4176 compile(ctx, src_, errmsg);
4179 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4181 CV_Assert(ctx.getImpl());
4184 // We don't cache OpenCL binaries
4185 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
4187 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4188 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4191 return compileWithCache(ctx, src_, errmsg);
4194 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4196 CV_Assert(ctx.getImpl());
4198 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
4200 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4201 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
4202 const std::string base_dir = config.prepareCacheDirectoryForContext(
4203 ctx.getImpl()->getPrefixString(),
4204 ctx.getImpl()->getPrefixBase()
4206 const String& hash_str = src_->sourceHash_;
4208 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
4210 CV_Assert(!hash_str.empty());
4211 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
4212 fname = utils::fs::join(base_dir, fname);
4214 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
4215 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
4219 std::vector<char> binaryBuf;
4222 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4223 BinaryProgramFile file(fname, hash_str.c_str());
4224 res = file.read(buildflags, binaryBuf);
4228 CV_Assert(!binaryBuf.empty());
4229 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
4230 bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
4235 catch (const cv::Exception& e)
4238 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
4242 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
4245 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4246 CV_Assert(handle == NULL);
4247 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
4249 if (!buildFromSources(ctx, src_, errmsg))
4254 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
4256 buildflags = joinBuildOptions(buildflags, " -x spir");
4257 if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
4259 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
4261 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
4262 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
4266 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
4268 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
4272 CV_Error(Error::StsInternal, "Internal error");
4274 CV_Assert(handle != NULL);
4275 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
4276 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
4280 std::vector<char> binaryBuf;
4281 getProgramBinary(binaryBuf);
4283 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
4284 BinaryProgramFile file(fname, hash_str.c_str());
4285 file.write(buildflags, binaryBuf);
4288 catch (const cv::Exception& e)
4290 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
4294 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
4297 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
4298 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4299 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4301 std::vector<char> binaryBuf;
4302 getProgramBinary(binaryBuf);
4303 if (!binaryBuf.empty())
4305 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4307 createFromBinary(ctx, binaryBuf, errmsg);
4311 return handle != NULL;
4314 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
4316 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
4319 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4320 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
4321 if (log_retval == CL_SUCCESS && retsz > 1)
4323 buffer.resize(retsz + 16);
4324 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
4325 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
4326 if (log_retval == CL_SUCCESS)
4328 if (retsz < buffer.size())
4331 buffer[buffer.size() - 1] = 0;
4339 errmsg = String(buffer.data());
4340 printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
4341 sourceModule_.c_str(), sourceName_.c_str(),
4342 result, getOpenCLErrorString(result),
4343 buildflags.c_str(), errmsg.c_str());
4347 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
4350 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
4351 CV_Assert(handle == NULL);
4352 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
4353 sourceModule_.c_str(), sourceName_.c_str(),
4354 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
4356 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
4358 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
4359 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
4360 CV_Assert(srcptr != NULL);
4361 CV_Assert(srclen > 0);
4365 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
4366 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
4367 CV_Assert(handle || retval != CL_SUCCESS);
4368 if (handle && retval == CL_SUCCESS)
4370 size_t n = ctx.ndevices();
4371 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
4372 cl_device_id* deviceList = deviceListBuf.data();
4373 for (size_t i = 0; i < n; i++)
4375 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
4378 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
4379 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
4380 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
4381 if (retval != CL_SUCCESS)
4384 dumpBuildLog_(retval, deviceList, errmsg);
4386 // don't remove "retval != CL_SUCCESS" condition here:
4387 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
4388 if (retval != CL_SUCCESS && handle)
4390 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4394 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4395 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4397 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
4399 char kernels_buffer[4096] = {0};
4400 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4401 if (retsz < sizeof(kernels_buffer))
4402 kernels_buffer[retsz] = 0;
4404 kernels_buffer[0] = 0;
4405 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4410 return handle != NULL;
4413 void getProgramBinary(std::vector<char>& buf)
4417 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
4419 uchar* ptr = (uchar*)&buf[0];
4420 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
4423 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
4425 return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
4428 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
4430 CV_Assert(handle == NULL);
4431 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
4432 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
4434 CV_Assert(binarySize > 0);
4436 size_t ndevices = (int)ctx.ndevices();
4437 AutoBuffer<cl_device_id> devices_(ndevices);
4438 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
4439 AutoBuffer<size_t> binarySizes_(ndevices);
4441 cl_device_id* devices = devices_.data();
4442 const uchar** binaryPtrs = binaryPtrs_.data();
4443 size_t* binarySizes = binarySizes_.data();
4444 for (size_t i = 0; i < ndevices; i++)
4446 devices[i] = (cl_device_id)ctx.device(i).ptr();
4447 binaryPtrs[i] = binaryAddr;
4448 binarySizes[i] = binarySize;
4452 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
4453 binarySizes, binaryPtrs, NULL, &result);
4454 if (result != CL_SUCCESS)
4456 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
4459 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4467 // call clBuildProgram()
4469 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
4470 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
4471 if (result != CL_SUCCESS)
4473 dumpBuildLog_(result, devices, errmsg);
4476 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4482 // check build status
4484 cl_build_status build_status = CL_BUILD_NONE;
4486 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
4487 sizeof(build_status), &build_status, &retsz));
4488 if (result == CL_SUCCESS)
4490 if (build_status == CL_BUILD_SUCCESS)
4496 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
4502 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
4505 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
4510 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
4511 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
4513 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
4515 char kernels_buffer[4096] = {0};
4516 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
4517 if (retsz < sizeof(kernels_buffer))
4518 kernels_buffer[retsz] = 0;
4520 kernels_buffer[0] = 0;
4521 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
4524 return handle != NULL;
4532 if (!cv::__termination)
4535 clReleaseProgram(handle);
4544 String sourceModule_;
4549 Program::Program() { p = 0; }
4551 Program::Program(const ProgramSource& src,
4552 const String& buildflags, String& errmsg)
4555 create(src, buildflags, errmsg);
4558 Program::Program(const Program& prog)
4565 Program& Program::operator = (const Program& prog)
4567 Impl* newp = (Impl*)prog.p;
4582 bool Program::create(const ProgramSource& src,
4583 const String& buildflags, String& errmsg)
4590 p = new Impl(src, buildflags, errmsg);
4599 void* Program::ptr() const
4601 return p ? p->handle : 0;
4604 #ifndef OPENCV_REMOVE_DEPRECATED_API
4605 const ProgramSource& Program::source() const
4607 CV_Error(Error::StsNotImplemented, "Removed API");
4610 bool Program::read(const String& bin, const String& buildflags)
4612 CV_UNUSED(bin); CV_UNUSED(buildflags);
4613 CV_Error(Error::StsNotImplemented, "Removed API");
4616 bool Program::write(String& bin) const
4619 CV_Error(Error::StsNotImplemented, "Removed API");
4622 String Program::getPrefix() const
4626 Context::Impl* ctx_ = Context::getDefault().getImpl();
4628 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4631 String Program::getPrefix(const String& buildflags)
4633 Context::Impl* ctx_ = Context::getDefault().getImpl();
4635 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4637 #endif // OPENCV_REMOVE_DEPRECATED_API
4639 void Program::getBinary(std::vector<char>& binary) const
4641 CV_Assert(p && "Empty program");
4642 p->getProgramBinary(binary);
4645 Program Context::Impl::getProg(const ProgramSource& src,
4646 const String& buildflags, String& errmsg)
4648 size_t limit = getProgramCountLimit();
4649 const ProgramSource::Impl* src_ = src.getImpl();
4651 String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4652 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4653 getPrefixString().c_str(),
4654 buildflags.c_str());
4656 cv::AutoLock lock(program_cache_mutex);
4657 phash_t::iterator it = phash.find(key);
4658 if (it != phash.end())
4661 CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4662 if (i != cacheList.end() && i != cacheList.begin())
4665 cacheList.push_front(key);
4669 { // cleanup program cache
4670 size_t sz = phash.size();
4671 if (limit > 0 && sz >= limit)
4673 static bool warningFlag = false;
4676 printf("\nWARNING: OpenCV-OpenCL:\n"
4677 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4678 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4681 while (!cacheList.empty())
4683 size_t c = phash.erase(cacheList.back());
4684 cacheList.pop_back();
4691 Program prog(src, buildflags, errmsg);
4692 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4694 cv::AutoLock lock(program_cache_mutex);
4695 phash.insert(std::pair<std::string, Program>(key, prog));
4696 cacheList.push_front(key);
4702 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4704 template<typename T>
4705 class OpenCLBufferPool
4708 ~OpenCLBufferPool() { }
4710 virtual T allocate(size_t size) = 0;
4711 virtual void release(T buffer) = 0;
4714 template <typename Derived, typename BufferEntry, typename T>
4715 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4718 inline Derived& derived() { return *static_cast<Derived*>(this); }
4722 size_t currentReservedSize;
4723 size_t maxReservedSize;
4725 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4726 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4729 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4731 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4732 for (; i != allocatedEntries_.end(); ++i)
4734 BufferEntry& e = *i;
4735 if (e.clBuffer_ == buffer)
4738 allocatedEntries_.erase(i);
4746 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4748 if (reservedEntries_.empty())
4750 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4751 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4753 size_t minDiff = (size_t)(-1);
4754 for (; i != reservedEntries_.end(); ++i)
4756 BufferEntry& e = *i;
4757 if (e.capacity_ >= size)
4759 size_t diff = e.capacity_ - size;
4760 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4770 if (result_pos != reservedEntries_.end())
4772 //CV_DbgAssert(result == *result_pos);
4773 reservedEntries_.erase(result_pos);
4775 currentReservedSize -= entry.capacity_;
4776 allocatedEntries_.push_back(entry);
4783 void _checkSizeOfReservedEntries()
4785 while (currentReservedSize > maxReservedSize)
4787 CV_DbgAssert(!reservedEntries_.empty());
4788 const BufferEntry& entry = reservedEntries_.back();
4789 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4790 currentReservedSize -= entry.capacity_;
4791 derived()._releaseBufferEntry(entry);
4792 reservedEntries_.pop_back();
4796 inline size_t _allocationGranularity(size_t size)
4799 if (size < 1024*1024)
4800 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4801 else if (size < 16*1024*1024)
4808 OpenCLBufferPoolBaseImpl()
4809 : currentReservedSize(0),
4814 virtual ~OpenCLBufferPoolBaseImpl()
4816 freeAllReservedBuffers();
4817 CV_Assert(reservedEntries_.empty());
4820 virtual T allocate(size_t size) CV_OVERRIDE
4822 AutoLock locker(mutex_);
4824 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4826 CV_DbgAssert(size <= entry.capacity_);
4827 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4831 derived()._allocateBufferEntry(entry, size);
4833 return entry.clBuffer_;
4835 virtual void release(T buffer) CV_OVERRIDE
4837 AutoLock locker(mutex_);
4839 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
4840 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
4842 derived()._releaseBufferEntry(entry);
4846 reservedEntries_.push_front(entry);
4847 currentReservedSize += entry.capacity_;
4848 _checkSizeOfReservedEntries();
4852 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
4853 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
4854 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
4856 AutoLock locker(mutex_);
4857 size_t oldMaxReservedSize = maxReservedSize;
4858 maxReservedSize = size;
4859 if (maxReservedSize < oldMaxReservedSize)
4861 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4862 for (; i != reservedEntries_.end();)
4864 const BufferEntry& entry = *i;
4865 if (entry.capacity_ > maxReservedSize / 8)
4867 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4868 currentReservedSize -= entry.capacity_;
4869 derived()._releaseBufferEntry(entry);
4870 i = reservedEntries_.erase(i);
4875 _checkSizeOfReservedEntries();
4878 virtual void freeAllReservedBuffers() CV_OVERRIDE
4880 AutoLock locker(mutex_);
4881 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
4882 for (; i != reservedEntries_.end(); ++i)
4884 const BufferEntry& entry = *i;
4885 derived()._releaseBufferEntry(entry);
4887 reservedEntries_.clear();
4888 currentReservedSize = 0;
4892 struct CLBufferEntry
4896 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
4899 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
4902 typedef struct CLBufferEntry BufferEntry;
4906 OpenCLBufferPoolImpl(int createFlags = 0)
4907 : createFlags_(createFlags)
4911 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4913 CV_DbgAssert(entry.clBuffer_ == NULL);
4914 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4915 Context& ctx = Context::getDefault();
4916 cl_int retval = CL_SUCCESS;
4917 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
4918 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
4919 CV_Assert(entry.clBuffer_ != NULL);
4920 if(retval == CL_SUCCESS)
4922 CV_IMPL_ADD(CV_IMPL_OCL);
4924 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
4925 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4926 allocatedEntries_.push_back(entry);
4929 void _releaseBufferEntry(const BufferEntry& entry)
4931 CV_Assert(entry.capacity_ != 0);
4932 CV_Assert(entry.clBuffer_ != NULL);
4933 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
4934 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4935 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
4939 #ifdef HAVE_OPENCL_SVM
4940 struct CLSVMBufferEntry
4944 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
4946 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
4949 typedef struct CLSVMBufferEntry BufferEntry;
4951 OpenCLSVMBufferPoolImpl()
4955 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4957 CV_DbgAssert(entry.clBuffer_ == NULL);
4958 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4960 Context& ctx = Context::getDefault();
4961 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4962 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4963 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
4964 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4966 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4967 CV_DbgAssert(svmFns->isValid());
4969 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
4970 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
4973 entry.clBuffer_ = buf;
4975 CV_IMPL_ADD(CV_IMPL_OCL);
4977 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
4978 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4979 allocatedEntries_.push_back(entry);
4982 void _releaseBufferEntry(const BufferEntry& entry)
4984 CV_Assert(entry.capacity_ != 0);
4985 CV_Assert(entry.clBuffer_ != NULL);
4986 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
4987 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4988 Context& ctx = Context::getDefault();
4989 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4990 CV_DbgAssert(svmFns->isValid());
4991 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
4992 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
4999 template <bool readAccess, bool writeAccess>
5000 class AlignedDataPtr
5004 uchar* const originPtr_;
5005 const size_t alignment_;
5007 uchar* allocatedPtr_;
5010 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
5011 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
5013 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5014 CV_DbgAssert(!readAccess || ptr);
5015 if (((size_t)ptr_ & (alignment - 1)) != 0)
5017 allocatedPtr_ = new uchar[size_ + alignment - 1];
5018 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5021 memcpy(ptr_, originPtr_, size_);
5026 uchar* getAlignedPtr() const
5028 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5038 memcpy(originPtr_, ptr_, size_);
5040 delete[] allocatedPtr_;
5041 allocatedPtr_ = NULL;
5046 AlignedDataPtr(const AlignedDataPtr&); // disabled
5047 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
5050 template <bool readAccess, bool writeAccess>
5051 class AlignedDataPtr2D
5055 uchar* const originPtr_;
5056 const size_t alignment_;
5058 uchar* allocatedPtr_;
5064 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
5065 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
5067 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
5068 CV_DbgAssert(!readAccess || ptr != NULL);
5069 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
5071 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
5072 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
5075 for (size_t i = 0; i < rows_; i++)
5076 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
5081 uchar* getAlignedPtr() const
5083 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
5093 for (size_t i = 0; i < rows_; i++)
5094 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
5096 delete[] allocatedPtr_;
5097 allocatedPtr_ = NULL;
5102 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
5103 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
5106 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
5107 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
5111 void Context::Impl::__init_buffer_pools()
5113 bufferPool_ = std::make_shared<OpenCLBufferPoolImpl>(0);
5114 OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get();
5115 bufferPoolHostPtr_ = std::make_shared<OpenCLBufferPoolImpl>(CL_MEM_ALLOC_HOST_PTR);
5116 OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get();
5118 size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
5119 size_t poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
5120 bufferPool.setMaxReservedSize(poolSize);
5121 size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
5122 bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr);
5124 #ifdef HAVE_OPENCL_SVM
5125 bufferPoolSVM_ = std::make_shared<OpenCLSVMBufferPoolImpl>();
5126 OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get();
5127 size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
5128 bufferPoolSVM.setMaxReservedSize(poolSizeSVM);
5131 CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr);
5134 class OpenCLAllocator CV_FINAL : public MatAllocator
5139 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
5140 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
5141 #ifdef HAVE_OPENCL_SVM
5142 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
5144 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
5149 matStdAllocator = Mat::getDefaultAllocator();
5153 flushCleanupQueue();
5156 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
5157 AccessFlag flags, UMatUsageFlags usageFlags) const
5159 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
5163 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
5165 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
5168 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
5170 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
5174 void getBestFlags(const Context& ctx, AccessFlag /*flags*/, UMatUsageFlags usageFlags, int& createFlags, UMatData::MemoryFlag& flags0) const
5176 const Device& dev = ctx.device(0);
5178 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
5179 createFlags |= CL_MEM_ALLOC_HOST_PTR;
5181 if (!isOpenCLCopyingForced() &&
5182 (isOpenCLMapForced() ||
5183 (dev.hostUnifiedMemory()
5190 flags0 = static_cast<UMatData::MemoryFlag>(0);
5192 flags0 = UMatData::COPY_ON_MAP;
5195 UMatData* allocate(int dims, const int* sizes, int type,
5196 void* data, size_t* step, AccessFlag flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5199 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5201 flushCleanupQueue();
5203 CV_Assert(data == 0);
5204 size_t total = CV_ELEM_SIZE(type);
5205 for( int i = dims-1; i >= 0; i-- )
5212 Context& ctx = Context::getDefault();
5214 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5215 Context::Impl& ctxImpl = *ctx.getImpl();
5217 int createFlags = 0;
5218 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5219 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
5221 void* handle = NULL;
5222 int allocatorFlags = 0;
5224 #ifdef HAVE_OPENCL_SVM
5225 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5226 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
5228 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
5229 handle = ctxImpl.getBufferPoolSVM().allocate(total);
5231 // this property is constant, so single buffer pool can be used here
5232 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5233 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5237 if (createFlags == 0)
5239 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
5240 handle = ctxImpl.getBufferPool().allocate(total);
5242 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
5244 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
5245 handle = ctxImpl.getBufferPoolHostPtr().allocate(total);
5249 CV_Assert(handle != NULL); // Unsupported, throw
5253 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
5255 UMatData* u = new UMatData(this);
5260 u->allocatorFlags_ = allocatorFlags;
5261 u->allocatorContext = std::static_pointer_cast<void>(std::make_shared<ocl::Context>(ctx));
5262 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
5263 u->markHostCopyObsolete(true);
5264 opencl_allocator_stats.onAllocate(u->size);
5268 bool allocate(UMatData* u, AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
5273 flushCleanupQueue();
5275 UMatDataAutoLock lock(u);
5279 CV_Assert(u->origdata != 0);
5280 Context& ctx = Context::getDefault();
5281 int createFlags = 0;
5282 UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
5283 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
5285 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
5287 cl_context ctx_handle = (cl_context)ctx.ptr();
5288 int allocatorFlags = 0;
5289 UMatData::MemoryFlag tempUMatFlags = static_cast<UMatData::MemoryFlag>(0);
5290 void* handle = NULL;
5291 cl_int retval = CL_SUCCESS;
5293 #ifdef HAVE_OPENCL_SVM
5294 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
5295 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
5296 if (useSVM && svmCaps.isSupportFineGrainSystem())
5298 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
5299 tempUMatFlags = UMatData::TEMP_UMAT;
5300 handle = u->origdata;
5301 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
5303 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
5305 if (!(accessFlags & ACCESS_FAST)) // memcpy used
5307 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
5309 cl_svm_mem_flags memFlags = createFlags |
5310 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
5312 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5313 CV_DbgAssert(svmFns->isValid());
5315 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
5316 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
5319 cl_command_queue q = NULL;
5320 if (!isFineGrainBuffer)
5322 q = (cl_command_queue)Queue::getDefault().ptr();
5323 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
5324 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
5327 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5330 memcpy(handle, u->origdata, u->size);
5331 if (!isFineGrainBuffer)
5333 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
5334 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
5335 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5338 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
5339 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
5340 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
5347 accessFlags &= ~ACCESS_FAST;
5349 tempUMatFlags = UMatData::TEMP_UMAT;
5354 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
5355 // There are OpenCL runtime issues for less aligned data
5356 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
5357 && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
5358 // Avoid sharing of host memory between OpenCL buffers
5359 && !(u->originalUMatData && u->originalUMatData->handle)
5362 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
5363 u->size, u->origdata, &retval);
5364 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
5365 (long long int)u->size, u->origdata, (void*)handle).c_str());
5367 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
5369 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
5370 u->size, u->origdata, &retval);
5371 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
5372 (long long int)u->size, u->origdata, (void*)handle).c_str());
5373 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
5376 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
5377 if(!handle || retval != CL_SUCCESS)
5380 u->prevAllocator = u->currAllocator;
5381 u->currAllocator = this;
5382 u->flags |= tempUMatFlags | flags0;
5383 u->allocatorFlags_ = allocatorFlags;
5385 if (!!(accessFlags & ACCESS_WRITE))
5386 u->markHostCopyObsolete(true);
5387 opencl_allocator_stats.onAllocate(u->size);
5391 /*void sync(UMatData* u) const
5393 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5394 UMatDataAutoLock lock(u);
5396 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
5398 if( u->tempCopiedUMat() )
5400 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5401 u->size, u->origdata, 0, 0, 0);
5406 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5407 (CL_MAP_READ | CL_MAP_WRITE),
5408 0, u->size, 0, 0, 0, &retval);
5409 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5412 u->markHostCopyObsolete(false);
5414 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
5416 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5417 u->size, u->data, 0, 0, 0);
5421 void deallocate(UMatData* u) const CV_OVERRIDE
5426 CV_Assert(u->urefcount == 0);
5427 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
5429 CV_Assert(u->handle != 0);
5430 CV_Assert(u->mapcount == 0);
5432 if (!!(u->flags & UMatData::ASYNC_CLEANUP))
5433 addToCleanupQueue(u);
5438 void deallocate_(UMatData* u) const
5441 CV_Assert(u->handle);
5442 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
5444 opencl_allocator_stats.onFree(u->size);
5448 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
5449 return; // avoid any OpenCL calls
5453 CV_Assert(u->origdata);
5454 // UMatDataAutoLock lock(u);
5456 if (u->hostCopyObsolete())
5458 #ifdef HAVE_OPENCL_SVM
5459 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5461 Context& ctx = Context::getDefault();
5462 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5463 CV_DbgAssert(svmFns->isValid());
5465 if( u->tempCopiedUMat() )
5467 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5468 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
5469 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
5470 cl_command_queue q = NULL;
5471 if (!isFineGrainBuffer)
5473 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
5474 q = (cl_command_queue)Queue::getDefault().ptr();
5475 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5476 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5479 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5482 memcpy(u->origdata, u->handle, u->size);
5483 if (!isFineGrainBuffer)
5485 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5486 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5487 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5492 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
5499 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5500 if( u->tempCopiedUMat() )
5502 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5503 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
5504 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
5511 CV_Assert(u->mapcount == 0);
5512 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
5513 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5514 (CL_MAP_READ | CL_MAP_WRITE),
5515 0, u->size, 0, 0, 0, &retval);
5516 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
5517 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
5518 if (u->originalUMatData)
5520 CV_Assert(u->originalUMatData->data == data);
5522 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
5523 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
5524 CV_OCL_DBG_CHECK(clFinish(q));
5528 u->markHostCopyObsolete(false);
5534 #ifdef HAVE_OPENCL_SVM
5535 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5537 if( u->tempCopiedUMat() )
5539 Context& ctx = Context::getDefault();
5540 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5541 CV_DbgAssert(svmFns->isValid());
5543 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
5544 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
5550 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
5551 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
5554 u->markDeviceCopyObsolete(true);
5555 u->currAllocator = u->prevAllocator;
5556 u->prevAllocator = NULL;
5557 if(u->data && u->copyOnMap() && u->data != u->origdata)
5559 u->data = u->origdata;
5560 u->currAllocator->deallocate(u);
5565 CV_Assert(u->origdata == NULL);
5566 if(u->data && u->copyOnMap() && u->data != u->origdata)
5570 u->markHostCopyObsolete(true);
5572 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
5574 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5576 ocl::Context& ctx = *pCtx.get();
5577 CV_Assert(ctx.getImpl());
5578 ctx.getImpl()->getBufferPool().release((cl_mem)u->handle);
5580 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
5582 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5584 ocl::Context& ctx = *pCtx.get();
5585 CV_Assert(ctx.getImpl());
5586 ctx.getImpl()->getBufferPoolHostPtr().release((cl_mem)u->handle);
5588 #ifdef HAVE_OPENCL_SVM
5589 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
5591 std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
5593 ocl::Context& ctx = *pCtx.get();
5594 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5598 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5599 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5601 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5602 CV_DbgAssert(svmFns->isValid());
5603 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5605 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
5607 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5608 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5609 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5612 CV_Assert(ctx.getImpl());
5613 ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle);
5618 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5621 u->markDeviceCopyObsolete(true);
5625 CV_Assert(u == NULL);
5628 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
5629 void map(UMatData* u, AccessFlag accessFlags) const CV_OVERRIDE
5631 CV_Assert(u && u->handle);
5633 if (!!(accessFlags & ACCESS_WRITE))
5634 u->markDeviceCopyObsolete(true);
5636 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5639 if( !u->copyOnMap() )
5642 // because there can be other map requests for the same UMat with different access flags,
5643 // we use the universal (read-write) access mode.
5644 #ifdef HAVE_OPENCL_SVM
5645 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5647 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5649 Context& ctx = Context::getDefault();
5650 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5651 CV_DbgAssert(svmFns->isValid());
5653 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5655 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5656 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5659 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5660 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5664 u->data = (uchar*)u->handle;
5665 u->markHostCopyObsolete(false);
5666 u->markDeviceMemMapped(true);
5671 cl_int retval = CL_SUCCESS;
5672 if (!u->deviceMemMapped())
5674 CV_Assert(u->refcount == 1);
5675 CV_Assert(u->mapcount++ == 0);
5676 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5677 (CL_MAP_READ | CL_MAP_WRITE),
5678 0, u->size, 0, 0, 0, &retval);
5679 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5681 if (u->data && retval == CL_SUCCESS)
5683 u->markHostCopyObsolete(false);
5684 u->markDeviceMemMapped(true);
5688 // TODO Is it really a good idea and was it tested well?
5689 // if map failed, switch to copy-on-map mode for the particular buffer
5690 u->flags |= UMatData::COPY_ON_MAP;
5695 u->data = (uchar*)fastMalloc(u->size);
5696 u->markHostCopyObsolete(true);
5700 if (!!(accessFlags & ACCESS_READ) && u->hostCopyObsolete())
5702 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5703 #ifdef HAVE_OPENCL_SVM
5704 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5706 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5707 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5708 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5709 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5710 u->markHostCopyObsolete(false);
5714 void unmap(UMatData* u) const CV_OVERRIDE
5720 CV_Assert(u->handle != 0);
5722 UMatDataAutoLock autolock(u);
5724 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5726 if( !u->copyOnMap() && u->deviceMemMapped() )
5728 CV_Assert(u->data != NULL);
5729 #ifdef HAVE_OPENCL_SVM
5730 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5732 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5734 Context& ctx = Context::getDefault();
5735 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5736 CV_DbgAssert(svmFns->isValid());
5738 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5740 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5741 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5743 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5745 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5748 if (u->refcount == 0)
5750 u->markDeviceCopyObsolete(false);
5751 u->markHostCopyObsolete(true);
5755 if (u->refcount == 0)
5757 CV_Assert(u->mapcount-- == 1);
5758 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5759 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5760 if (Device::getDefault().isAMD())
5762 // required for multithreaded applications (see stitching test)
5763 CV_OCL_DBG_CHECK(clFinish(q));
5765 u->markDeviceMemMapped(false);
5767 u->markDeviceCopyObsolete(false);
5768 u->markHostCopyObsolete(true);
5771 else if( u->copyOnMap() && u->deviceCopyObsolete() )
5773 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5774 #ifdef HAVE_OPENCL_SVM
5775 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5777 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5778 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5779 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5780 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5781 u->markDeviceCopyObsolete(false);
5782 u->markHostCopyObsolete(true);
5786 bool checkContinuous(int dims, const size_t sz[],
5787 const size_t srcofs[], const size_t srcstep[],
5788 const size_t dstofs[], const size_t dststep[],
5789 size_t& total, size_t new_sz[],
5790 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5791 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5793 bool iscontinuous = true;
5794 srcrawofs = srcofs ? srcofs[dims-1] : 0;
5795 dstrawofs = dstofs ? dstofs[dims-1] : 0;
5797 for( int i = dims-2; i >= 0; i-- )
5799 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5800 iscontinuous = false;
5803 srcrawofs += srcofs[i]*srcstep[i];
5805 dstrawofs += dstofs[i]*dststep[i];
5810 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5813 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5814 // we assume that new_... arrays are initialized by caller
5815 // with 0's, so there is no else branch
5818 new_srcofs[0] = srcofs[1];
5819 new_srcofs[1] = srcofs[0];
5825 new_dstofs[0] = dstofs[1];
5826 new_dstofs[1] = dstofs[0];
5830 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5831 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5835 // we could check for dims == 3 here,
5836 // but from user perspective this one is more informative
5837 CV_Assert(dims <= 3);
5838 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
5841 new_srcofs[0] = srcofs[2];
5842 new_srcofs[1] = srcofs[1];
5843 new_srcofs[2] = srcofs[0];
5848 new_dstofs[0] = dstofs[2];
5849 new_dstofs[1] = dstofs[1];
5850 new_dstofs[2] = dstofs[0];
5853 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
5854 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
5857 return iscontinuous;
5860 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
5861 const size_t srcofs[], const size_t srcstep[],
5862 const size_t dststep[]) const CV_OVERRIDE
5866 UMatDataAutoLock autolock(u);
5868 if( u->data && !u->hostCopyObsolete() )
5870 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
5873 CV_Assert( u->handle != 0 );
5875 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5877 size_t total = 0, new_sz[] = {0, 0, 0};
5878 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5879 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5881 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
5883 srcrawofs, new_srcofs, new_srcstep,
5884 dstrawofs, new_dstofs, new_dststep);
5886 #ifdef HAVE_OPENCL_SVM
5887 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5889 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5890 Context& ctx = Context::getDefault();
5891 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5892 CV_DbgAssert(svmFns->isValid());
5894 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5895 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5897 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5898 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5901 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5906 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
5910 // This code is from MatAllocator::download()
5911 int isz[CV_MAX_DIM];
5912 uchar* srcptr = (uchar*)u->handle;
5913 for( int i = 0; i < dims; i++ )
5915 CV_Assert( sz[i] <= (size_t)INT_MAX );
5919 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5920 isz[i] = (int)sz[i];
5923 Mat src(dims, isz, CV_8U, srcptr, srcstep);
5924 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5926 const Mat* arrays[] = { &src, &dst };
5928 NAryMatIterator it(arrays, ptrs, 2);
5929 size_t j, planesz = it.size;
5931 for( j = 0; j < it.nplanes; j++, ++it )
5932 memcpy(ptrs[1], ptrs[0], planesz);
5934 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5936 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5937 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5939 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5948 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5949 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5950 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
5952 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5954 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5955 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5956 size_t membuf_ofs = srcrawofs - new_srcrawofs;
5957 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5958 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5959 uchar* ptr = alignedPtr.getAlignedPtr();
5961 CV_Assert(new_srcstep[0] >= new_sz[0]);
5962 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
5963 total = std::min(total, u->size - new_srcrawofs);
5964 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5965 new_srcrawofs, total, ptr, 0, 0, 0));
5966 for( size_t i = 0; i < new_sz[1]; i++ )
5967 memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
5971 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5972 uchar* ptr = alignedPtr.getAlignedPtr();
5974 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5975 new_srcofs, new_dstofs, new_sz,
5983 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
5984 const size_t dstofs[], const size_t dststep[],
5985 const size_t srcstep[]) const CV_OVERRIDE
5990 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5991 CV_Assert(u->refcount == 0 || u->tempUMat());
5993 size_t total = 0, new_sz[] = {0, 0, 0};
5994 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5995 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5997 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
5999 srcrawofs, new_srcofs, new_srcstep,
6000 dstrawofs, new_dstofs, new_dststep);
6002 UMatDataAutoLock autolock(u);
6004 // if there is cached CPU copy of the GPU matrix,
6005 // we could use it as a destination.
6006 // we can do it in 2 cases:
6007 // 1. we overwrite the whole content
6008 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
6009 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
6011 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
6012 u->markHostCopyObsolete(false);
6013 u->markDeviceCopyObsolete(true);
6017 CV_Assert( u->handle != 0 );
6018 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6020 #ifdef HAVE_OPENCL_SVM
6021 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6023 CV_DbgAssert(u->data == NULL || u->data == u->handle);
6024 Context& ctx = Context::getDefault();
6025 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6026 CV_DbgAssert(svmFns->isValid());
6028 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
6029 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6031 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
6032 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
6035 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
6040 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
6044 // This code is from MatAllocator::upload()
6045 int isz[CV_MAX_DIM];
6046 uchar* dstptr = (uchar*)u->handle;
6047 for( int i = 0; i < dims; i++ )
6049 CV_Assert( sz[i] <= (size_t)INT_MAX );
6053 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6054 isz[i] = (int)sz[i];
6057 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
6058 Mat dst(dims, isz, CV_8U, dstptr, dststep);
6060 const Mat* arrays[] = { &src, &dst };
6062 NAryMatIterator it(arrays, ptrs, 2);
6063 size_t j, planesz = it.size;
6065 for( j = 0; j < it.nplanes; j++, ++it )
6066 memcpy(ptrs[1], ptrs[0], planesz);
6068 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
6070 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
6071 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
6073 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
6082 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
6083 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6084 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
6085 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
6086 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
6088 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6090 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6091 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6092 size_t membuf_ofs = dstrawofs - new_dstrawofs;
6093 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
6094 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6095 uchar* ptr = alignedPtr.getAlignedPtr();
6097 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6098 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
6099 total = std::min(total, u->size - new_dstrawofs);
6100 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
6101 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
6102 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
6103 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
6104 new_dstrawofs, total, ptr, 0, 0, 0));
6105 for( size_t i = 0; i < new_sz[1]; i++ )
6106 memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
6107 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
6108 new_dstrawofs, total, ptr, 0, 0, 0));
6112 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
6113 uchar* ptr = alignedPtr.getAlignedPtr();
6115 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
6116 new_dstofs, new_srcofs, new_sz,
6122 u->markHostCopyObsolete(true);
6123 #ifdef HAVE_OPENCL_SVM
6124 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6125 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6132 u->markHostCopyObsolete(true);
6134 u->markDeviceCopyObsolete(false);
6137 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
6138 const size_t srcofs[], const size_t srcstep[],
6139 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
6144 size_t total = 0, new_sz[] = {0, 0, 0};
6145 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
6146 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
6148 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
6150 srcrawofs, new_srcofs, new_srcstep,
6151 dstrawofs, new_dstofs, new_dststep);
6153 UMatDataAutoLock src_autolock(src, dst);
6155 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
6157 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6160 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
6162 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6163 dst->markHostCopyObsolete(false);
6164 #ifdef HAVE_OPENCL_SVM
6165 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6166 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6173 dst->markDeviceCopyObsolete(true);
6178 // there should be no user-visible CPU copies of the UMat which we are going to copy to
6179 CV_Assert(dst->refcount == 0);
6180 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6182 cl_int retval = CL_SUCCESS;
6183 #ifdef HAVE_OPENCL_SVM
6184 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
6185 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6187 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
6188 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6190 Context& ctx = Context::getDefault();
6191 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
6192 CV_DbgAssert(svmFns->isValid());
6196 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
6197 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
6198 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
6199 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
6200 total, 0, NULL, NULL);
6201 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
6206 // This code is from MatAllocator::download()/upload()
6207 int isz[CV_MAX_DIM];
6208 uchar* srcptr = (uchar*)src->handle;
6209 for( int i = 0; i < dims; i++ )
6211 CV_Assert( sz[i] <= (size_t)INT_MAX );
6215 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
6216 isz[i] = (int)sz[i];
6218 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
6220 uchar* dstptr = (uchar*)dst->handle;
6221 for( int i = 0; i < dims; i++ )
6224 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
6226 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
6228 const Mat* arrays[] = { &m_src, &m_dst };
6230 NAryMatIterator it(arrays, ptrs, 2);
6231 size_t j, planesz = it.size;
6233 for( j = 0; j < it.nplanes; j++, ++it )
6234 memcpy(ptrs[1], ptrs[0], planesz);
6239 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
6241 map(src, ACCESS_READ);
6242 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
6247 map(dst, ACCESS_WRITE);
6248 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
6258 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6259 srcrawofs, dstrawofs, total, 0, 0, 0);
6260 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
6261 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
6263 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
6265 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
6266 size_t new_srcrawofs = srcrawofs & ~(padding-1);
6267 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
6268 size_t new_dstrawofs = dstrawofs & ~(padding-1);
6269 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
6271 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
6272 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6273 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
6274 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
6275 uchar* srcptr = srcBuf.getAlignedPtr();
6276 uchar* dstptr = dstBuf.getAlignedPtr();
6278 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
6280 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
6281 src_total = std::min(src_total, src->size - new_srcrawofs);
6282 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
6283 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
6285 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
6286 new_srcrawofs, src_total, srcptr, 0, 0, 0));
6287 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6288 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6290 for( size_t i = 0; i < new_sz[1]; i++ )
6291 memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
6292 srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
6293 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
6294 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
6298 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
6299 new_srcofs, new_dstofs, new_sz,
6305 if (retval == CL_SUCCESS)
6307 CV_IMPL_ADD(CV_IMPL_OCL)
6310 #ifdef HAVE_OPENCL_SVM
6311 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
6312 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
6319 dst->markHostCopyObsolete(true);
6321 dst->markDeviceCopyObsolete(false);
6325 CV_OCL_DBG_CHECK(clFinish(q));
6329 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE
6331 ocl::Context ctx = Context::getDefault();
6334 #ifdef HAVE_OPENCL_SVM
6335 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
6337 return &ctx.getImpl()->getBufferPoolSVM();
6340 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
6342 return &ctx.getImpl()->getBufferPoolHostPtr();
6344 if (id != NULL && strcmp(id, "OCL") != 0)
6346 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
6348 return &ctx.getImpl()->getBufferPool();
6351 MatAllocator* matStdAllocator;
6353 mutable cv::Mutex cleanupQueueMutex;
6354 mutable std::deque<UMatData*> cleanupQueue;
6356 void flushCleanupQueue() const
6358 if (!cleanupQueue.empty())
6360 std::deque<UMatData*> q;
6362 cv::AutoLock lock(cleanupQueueMutex);
6363 q.swap(cleanupQueue);
6365 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
6371 void addToCleanupQueue(UMatData* u) const
6373 //TODO: Validation check: CV_Assert(!u->tempUMat());
6375 cv::AutoLock lock(cleanupQueueMutex);
6376 cleanupQueue.push_back(u);
6381 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
6383 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
6386 MatAllocator* getOpenCLAllocator()
6388 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
6391 }} // namespace cv::ocl
6396 // three funcs below are implemented in umatrix.cpp
6397 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
6398 bool autoSteps = false );
6399 void finalizeHdr(UMat& m);
6404 namespace cv { namespace ocl {
6407 // Convert OpenCL buffer memory to UMat
6409 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
6412 int sizes[] = { rows, cols };
6414 CV_Assert(0 <= d && d <= CV_MAX_DIM);
6418 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
6419 dst.usageFlags = USAGE_DEFAULT;
6421 setSize(dst, d, sizes, 0, true);
6424 cl_mem memobj = (cl_mem)cl_mem_buffer;
6425 cl_mem_object_type mem_type = 0;
6427 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6429 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
6432 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
6434 CV_OCL_CHECK(clRetainMemObject(memobj));
6436 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
6437 CV_Assert(total >= rows * step);
6439 // attach clBuffer to UMatData
6440 dst.u = new UMatData(getOpenCLAllocator());
6442 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
6443 dst.u->flags = static_cast<UMatData::MemoryFlag>(0);
6444 dst.u->handle = cl_mem_buffer;
6445 dst.u->origdata = 0;
6446 dst.u->prevAllocator = 0;
6447 dst.u->size = total;
6453 } // convertFromBuffer()
6457 // Convert OpenCL image2d_t memory to UMat
6459 void convertFromImage(void* cl_mem_image, UMat& dst)
6461 cl_mem clImage = (cl_mem)cl_mem_image;
6462 cl_mem_object_type mem_type = 0;
6464 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
6466 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
6468 cl_image_format fmt = { 0, 0 };
6469 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
6472 switch (fmt.image_channel_data_type)
6475 case CL_UNSIGNED_INT8:
6480 case CL_SIGNED_INT8:
6484 case CL_UNORM_INT16:
6485 case CL_UNSIGNED_INT16:
6489 case CL_SNORM_INT16:
6490 case CL_SIGNED_INT16:
6494 case CL_SIGNED_INT32:
6503 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
6507 switch (fmt.image_channel_order)
6510 type = CV_MAKE_TYPE(depth, 1);
6516 type = CV_MAKE_TYPE(depth, 4);
6520 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
6525 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
6528 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
6531 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
6533 dst.create((int)h, (int)w, type);
6535 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
6537 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
6540 size_t src_origin[3] = { 0, 0, 0 };
6541 size_t region[3] = { w, h, 1 };
6542 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
6544 CV_OCL_CHECK(clFinish(q));
6547 } // convertFromImage()
6550 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
6552 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
6554 cl_uint numDevices = 0;
6555 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
6556 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
6558 CV_OCL_DBG_CHECK_RESULT(status,
6559 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
6562 if (numDevices == 0)
6568 devices.resize((size_t)numDevices);
6569 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
6572 struct PlatformInfo::Impl
6577 handle = *(cl_platform_id*)id;
6578 getDevices(devices, handle);
6580 version_ = getStrProp(CL_PLATFORM_VERSION);
6581 parseOpenCLVersion(version_, versionMajor_, versionMinor_);
6584 String getStrProp(cl_platform_info prop) const
6588 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
6589 sz < sizeof(buf) ? String(buf) : String();
6592 IMPLEMENT_REFCOUNTABLE();
6593 std::vector<cl_device_id> devices;
6594 cl_platform_id handle;
6601 PlatformInfo::PlatformInfo()
6606 PlatformInfo::PlatformInfo(void* platform_id)
6608 p = new Impl(platform_id);
6611 PlatformInfo::~PlatformInfo()
6617 PlatformInfo::PlatformInfo(const PlatformInfo& i)
6624 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6637 int PlatformInfo::deviceNumber() const
6639 return p ? (int)p->devices.size() : 0;
6642 void PlatformInfo::getDevice(Device& device, int d) const
6644 CV_Assert(p && d < (int)p->devices.size() );
6646 device.set(p->devices[d]);
6649 String PlatformInfo::name() const
6651 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6654 String PlatformInfo::vendor() const
6656 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6659 String PlatformInfo::version() const
6661 return p ? p->version_ : String();
6664 int PlatformInfo::versionMajor() const
6667 return p->versionMajor_;
6670 int PlatformInfo::versionMinor() const
6673 return p->versionMinor_;
6676 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6678 cl_uint numPlatforms = 0;
6679 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6681 if (numPlatforms == 0)
6687 platforms.resize((size_t)numPlatforms);
6688 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6691 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6693 std::vector<cl_platform_id> platforms;
6694 getPlatforms(platforms);
6696 for (size_t i = 0; i < platforms.size(); i++)
6697 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6700 const char* typeToStr(int type)
6702 static const char* tab[]=
6704 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6705 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6706 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6707 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6708 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6709 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6710 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6711 "half", "half2", "half3", "half4", 0, 0, 0, "half8", 0, 0, 0, 0, 0, 0, 0, "half16",
6712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6714 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6715 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6720 const char* memopTypeToStr(int type)
6722 static const char* tab[] =
6724 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6725 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6726 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6727 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6728 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6729 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6730 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6731 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6732 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6734 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6735 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6740 const char* vecopTypeToStr(int type)
6742 static const char* tab[] =
6744 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6745 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6746 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6747 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6748 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6749 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6750 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6751 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6752 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6754 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6755 const char* result = cn > 16 ? 0 : tab[depth*16 + cn-1];
6760 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6762 if( sdepth == ddepth )
6764 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6765 if( ddepth >= CV_32F ||
6766 (ddepth == CV_32S && sdepth < CV_32S) ||
6767 (ddepth == CV_16S && sdepth <= CV_8S) ||
6768 (ddepth == CV_16U && sdepth == CV_8U))
6770 sprintf(buf, "convert_%s", typestr);
6772 else if( sdepth >= CV_32F )
6773 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6775 sprintf(buf, "convert_%s_sat", typestr);
6780 const char* getOpenCLErrorString(int errorCode)
6782 #define CV_OCL_CODE(id) case id: return #id
6783 #define CV_OCL_CODE_(id, name) case id: return #name
6786 CV_OCL_CODE(CL_SUCCESS);
6787 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6788 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6789 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6790 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6791 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6792 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6793 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6794 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6795 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6796 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6797 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
6798 CV_OCL_CODE(CL_MAP_FAILURE);
6799 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
6800 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
6801 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
6802 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
6803 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
6804 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
6805 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
6806 CV_OCL_CODE(CL_INVALID_VALUE);
6807 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
6808 CV_OCL_CODE(CL_INVALID_PLATFORM);
6809 CV_OCL_CODE(CL_INVALID_DEVICE);
6810 CV_OCL_CODE(CL_INVALID_CONTEXT);
6811 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
6812 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
6813 CV_OCL_CODE(CL_INVALID_HOST_PTR);
6814 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
6815 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
6816 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
6817 CV_OCL_CODE(CL_INVALID_SAMPLER);
6818 CV_OCL_CODE(CL_INVALID_BINARY);
6819 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
6820 CV_OCL_CODE(CL_INVALID_PROGRAM);
6821 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
6822 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
6823 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
6824 CV_OCL_CODE(CL_INVALID_KERNEL);
6825 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
6826 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
6827 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
6828 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
6829 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
6830 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
6831 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
6832 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
6833 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
6834 CV_OCL_CODE(CL_INVALID_EVENT);
6835 CV_OCL_CODE(CL_INVALID_OPERATION);
6836 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
6837 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
6838 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
6839 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
6841 CV_OCL_CODE(CL_INVALID_PROPERTY);
6843 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
6844 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
6845 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
6846 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
6848 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
6849 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
6851 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
6852 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
6853 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
6854 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
6855 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
6856 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
6857 default: return "Unknown OpenCL error";
6863 template <typename T>
6864 static std::string kerToStr(const Mat & k)
6866 int width = k.cols - 1, depth = k.depth();
6867 const T * const data = k.ptr<T>();
6869 std::ostringstream stream;
6870 stream.precision(10);
6874 for (int i = 0; i < width; ++i)
6875 stream << "DIG(" << (int)data[i] << ")";
6876 stream << "DIG(" << (int)data[width] << ")";
6878 else if (depth == CV_32F)
6880 stream.setf(std::ios_base::showpoint);
6881 for (int i = 0; i < width; ++i)
6882 stream << "DIG(" << data[i] << "f)";
6883 stream << "DIG(" << data[width] << "f)";
6887 for (int i = 0; i < width; ++i)
6888 stream << "DIG(" << data[i] << ")";
6889 stream << "DIG(" << data[width] << ")";
6892 return stream.str();
6895 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
6897 Mat kernel = _kernel.getMat().reshape(1, 1);
6899 int depth = kernel.depth();
6903 if (ddepth != depth)
6904 kernel.convertTo(kernel, ddepth);
6906 typedef std::string (* func_t)(const Mat &);
6907 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
6908 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
6909 const func_t func = funcs[ddepth];
6910 CV_Assert(func != 0);
6912 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
6915 #define PROCESS_SRC(src) \
6920 CV_Assert(src.isMat() || src.isUMat()); \
6921 Size csize = src.size(); \
6922 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
6923 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
6924 if (cwidth < ckercn || ckercn <= 0) \
6926 cols.push_back(cwidth); \
6927 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
6929 offsets.push_back(src.offset()); \
6930 steps.push_back(src.step()); \
6931 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
6932 kercns.push_back(ckercn); \
6937 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
6938 InputArray src4, InputArray src5, InputArray src6,
6939 InputArray src7, InputArray src8, InputArray src9,
6940 OclVectorStrategy strat)
6942 const ocl::Device & d = ocl::Device::getDefault();
6944 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
6945 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
6946 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
6947 d.preferredVectorWidthDouble(), -1 };
6949 // if the device says don't use vectors
6950 if (vectorWidths[0] == 1)
6953 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
6954 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
6955 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
6958 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
6961 int checkOptimalVectorWidth(const int *vectorWidths,
6962 InputArray src1, InputArray src2, InputArray src3,
6963 InputArray src4, InputArray src5, InputArray src6,
6964 InputArray src7, InputArray src8, InputArray src9,
6965 OclVectorStrategy strat)
6967 CV_Assert(vectorWidths);
6969 int ref_type = src1.type();
6971 std::vector<size_t> offsets, steps, cols;
6972 std::vector<int> dividers, kercns;
6983 size_t size = offsets.size();
6985 for (size_t i = 0; i < size; ++i)
6986 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
6987 dividers[i] >>= 1, kercns[i] >>= 1;
6990 int kercn = *std::min_element(kercns.begin(), kercns.end());
6995 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
6996 InputArray src4, InputArray src5, InputArray src6,
6997 InputArray src7, InputArray src8, InputArray src9)
6999 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
7005 // TODO Make this as a method of OpenCL "BuildOptions" class
7006 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
7008 if (!buildOptions.empty())
7009 buildOptions += " ";
7010 int type = _m.type(), depth = CV_MAT_DEPTH(type);
7011 buildOptions += format(
7012 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
7013 name.c_str(), ocl::typeToStr(type),
7014 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
7015 name.c_str(), (int)CV_MAT_CN(type),
7016 name.c_str(), (int)CV_ELEM_SIZE(type),
7017 name.c_str(), (int)CV_ELEM_SIZE1(type),
7018 name.c_str(), (int)depth
7023 struct Image2D::Impl
7025 Impl(const UMat &src, bool norm, bool alias)
7029 init(src, norm, alias);
7035 clReleaseMemObject(handle);
7038 static cl_image_format getImageFormat(int depth, int cn, bool norm)
7040 cl_image_format format;
7041 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
7042 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
7043 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
7044 CL_SNORM_INT16, -1, -1, -1, -1 };
7045 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
7047 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
7048 int channelOrder = channelOrders[cn];
7049 format.image_channel_data_type = (cl_channel_type)channelType;
7050 format.image_channel_order = (cl_channel_order)channelOrder;
7054 static bool isFormatSupported(cl_image_format format)
7057 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7059 cl_context context = (cl_context)Context::getDefault().ptr();
7063 // Figure out how many formats are supported by this context.
7064 cl_uint numFormats = 0;
7065 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7066 CL_MEM_OBJECT_IMAGE2D, numFormats,
7068 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
7071 AutoBuffer<cl_image_format> formats(numFormats);
7072 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
7073 CL_MEM_OBJECT_IMAGE2D, numFormats,
7074 formats.data(), NULL);
7075 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
7076 for (cl_uint i = 0; i < numFormats; ++i)
7078 if (!memcmp(&formats[i], &format, sizeof(format)))
7087 void init(const UMat &src, bool norm, bool alias)
7090 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
7092 CV_Assert(!src.empty());
7093 CV_Assert(ocl::Device::getDefault().imageSupport());
7095 int err, depth = src.depth(), cn = src.channels();
7097 cl_image_format format = getImageFormat(depth, cn, norm);
7099 if (!isFormatSupported(format))
7100 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
7102 if (alias && !src.handle(ACCESS_RW))
7103 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
7105 cl_context context = (cl_context)Context::getDefault().ptr();
7106 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
7108 #ifdef CL_VERSION_1_2
7109 // this enables backwards portability to
7110 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
7111 const Device & d = ocl::Device::getDefault();
7112 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
7113 CV_Assert(!alias || canCreateAlias(src));
7114 if (1 < major || (1 == major && 2 <= minor))
7117 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
7118 desc.image_width = src.cols;
7119 desc.image_height = src.rows;
7120 desc.image_depth = 0;
7121 desc.image_array_size = 1;
7122 desc.image_row_pitch = alias ? src.step[0] : 0;
7123 desc.image_slice_pitch = 0;
7124 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
7125 desc.num_mip_levels = 0;
7126 desc.num_samples = 0;
7127 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
7132 CV_SUPPRESS_DEPRECATED_START
7133 CV_Assert(!alias); // This is an OpenCL 1.2 extension
7134 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
7135 CV_SUPPRESS_DEPRECATED_END
7137 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
7139 size_t origin[] = { 0, 0, 0 };
7140 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
7143 if (!alias && !src.isContinuous())
7145 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
7146 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
7147 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
7150 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
7151 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
7152 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
7153 CV_OCL_DBG_CHECK(clFlush(queue));
7157 devData = (cl_mem)src.handle(ACCESS_READ);
7159 CV_Assert(devData != NULL);
7163 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
7164 if (!src.isContinuous())
7166 CV_OCL_DBG_CHECK(clFlush(queue));
7167 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
7172 IMPLEMENT_REFCOUNTABLE();
7182 Image2D::Image2D(const UMat &src, bool norm, bool alias)
7184 p = new Impl(src, norm, alias);
7187 bool Image2D::canCreateAlias(const UMat &m)
7190 const Device & d = ocl::Device::getDefault();
7191 if (d.imageFromBufferSupport() && !m.empty())
7193 // This is the required pitch alignment in pixels
7194 uint pitchAlign = d.imagePitchAlignment();
7195 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
7197 // We don't currently handle the case where the buffer was created
7198 // with CL_MEM_USE_HOST_PTR
7199 if (!m.u->tempUMat())
7208 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
7210 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
7212 return Impl::isFormatSupported(format);
7215 Image2D::Image2D(const Image2D & i)
7222 Image2D & Image2D::operator = (const Image2D & i)
7241 void* Image2D::ptr() const
7243 return p ? p->handle : 0;
7246 bool internal::isOpenCLForced()
7248 static bool initialized = false;
7249 static bool value = false;
7252 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
7258 bool internal::isPerformanceCheckBypassed()
7260 static bool initialized = false;
7261 static bool value = false;
7264 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
7270 bool internal::isCLBuffer(UMat& u)
7272 void* h = u.handle(ACCESS_RW);
7275 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
7277 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
7280 cl_mem_object_type type = 0;
7281 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
7282 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
7292 Impl(const Queue& q)
7301 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7307 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
7311 uint64 durationNS() const
7313 return (uint64)(timer.getTimeSec() * 1e9);
7319 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
7320 Timer::~Timer() { delete p; }
7334 uint64 Timer::durationNS() const
7337 return p->durationNS();
7343 namespace cv { namespace directx { namespace internal {
7344 OpenCLDirectXImpl* getDirectXImpl(ocl::Context& ctx)
7346 ocl::Context::Impl* i = ctx.getImpl();
7348 return i->getDirectXImpl();
7350 }}} // namespace cv::directx::internal
7353 #endif // HAVE_OPENCL