1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
45 #include "ocl_disabled.impl.hpp"
54 #include <iostream> // std::cerr
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
60 #include <opencv2/core/utils/configuration.private.hpp>
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
80 #define CV_OPENCL_SHOW_RUN_KERNELS 0
81 #define CV_OPENCL_TRACE_CHECK 0
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
86 #define CV_OPENCL_SHOW_SVM_LOG 0
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
91 # define LOG_BUFFER_POOL printf
93 # define LOG_BUFFER_POOL(...)
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
101 #define CV_OPENCL_SVM_TRACE_P(...)
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
111 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
116 #ifdef HAVE_OPENCL_SVM
117 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
119 #include "opencv2/core/opencl/opencl_svm.hpp"
122 #include "umatrix.hpp"
124 namespace cv { namespace ocl {
126 #define IMPLEMENT_REFCOUNTABLE() \
127 void addref() { CV_XADD(&refcount, 1); } \
128 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
131 static cv::utils::AllocatorStatistics opencl_allocator_stats;
133 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
134 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
136 return opencl_allocator_stats;
140 static bool isRaiseError()
142 static bool initialized = false;
143 static bool value = false;
146 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
153 #if CV_OPENCL_TRACE_CHECK
155 void traceOpenCLCheck(cl_int status, const char* message)
157 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
159 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
161 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
164 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
165 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
167 #define CV_OCL_CHECK_RESULT(check_result, msg) \
169 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
170 if (check_result != CL_SUCCESS) \
172 if (0) { const char* msg_ = (msg); CV_UNUSED(msg_); /* ensure const char* type (cv::String without c_str()) */ } \
173 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
174 CV_Error(Error::OpenCLApiCallError, error_msg); \
178 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
180 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
183 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
184 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
185 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
189 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
190 if (check_result != CL_SUCCESS && isRaiseError()) \
192 if (0) { const char* msg_ = (msg); CV_UNUSED(msg_); /* ensure const char* type (cv::String without c_str()) */ } \
193 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
194 CV_Error(Error::OpenCLApiCallError, error_msg); \
197 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
198 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
202 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
203 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
204 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
205 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
207 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
208 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
211 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
212 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
220 static const String getBuildExtraOptions()
222 static String param_buildExtraOptions;
223 static bool initialized = false;
226 param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
228 if (!param_buildExtraOptions.empty())
229 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
231 return param_buildExtraOptions;
234 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
235 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
240 UMat2D(const UMat& m)
242 offset = (int)m.offset;
255 UMat3D(const UMat& m)
257 offset = (int)m.offset;
258 step = (int)m.step.p[1];
259 slicestep = (int)m.step.p[0];
260 slices = (int)m.size.p[0];
272 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
273 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
275 static uint64 table[256];
276 static bool initialized = false;
280 for( int i = 0; i < 256; i++ )
283 for( int j = 0; j < 8; j++ )
284 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
291 for( size_t idx = 0; idx < size; idx++ )
292 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
297 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
298 struct OpenCLBinaryCacheConfigurator
300 cv::String cache_path_;
301 cv::String cache_lock_filename_;
302 cv::Ptr<utils::fs::FileLock> cache_lock_;
304 typedef std::map<std::string, std::string> ContextCacheType;
305 ContextCacheType prepared_contexts_;
306 Mutex mutex_prepared_contexts_;
308 OpenCLBinaryCacheConfigurator()
310 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
311 if (!CV_OPENCL_CACHE_ENABLE)
313 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
316 cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
317 if (cache_path_.empty())
319 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
325 if (cache_path_.empty())
327 if (cache_path_ == "disabled")
329 if (!utils::fs::createDirectories(cache_path_))
331 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
336 if (CV_OPENCL_CACHE_LOCK_ENABLE)
338 cache_lock_filename_ = cache_path_ + ".lock";
339 if (!utils::fs::exists(cache_lock_filename_))
341 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
342 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
343 if (!lock_filename.is_open())
345 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
352 cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
353 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
355 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
357 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
359 catch (const cv::Exception& e)
361 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
365 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
370 if (CV_OPENCL_CACHE_WRITE)
372 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
373 "(not safe for multiprocess environment)");
377 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
381 catch (const cv::Exception& e)
383 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
387 if (!cache_path_.empty())
389 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
391 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
392 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
396 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
404 cache_lock_filename_.clear();
405 cache_lock_.release();
408 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
409 const std::string& cleanup_prefix)
411 if (cache_path_.empty())
412 return std::string();
414 AutoLock lock(mutex_prepared_contexts_);
416 ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
417 if (found_it != prepared_contexts_.end())
418 return found_it->second;
420 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
422 std::string target_directory = cache_path_ + ctx_prefix + "/";
423 bool result = utils::fs::isDirectory(target_directory);
428 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
429 if (utils::fs::createDirectories(target_directory))
435 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
438 catch (const cv::Exception& e)
440 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
443 target_directory = result ? target_directory : std::string();
444 prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
446 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
450 std::vector<String> entries;
451 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
452 std::vector<String> remove_entries;
453 for (size_t i = 0; i < entries.size(); i++)
455 const String& name = entries[i];
456 if (0 == name.find(cleanup_prefix))
458 if (0 == name.find(ctx_prefix))
459 continue; // skip current
460 remove_entries.push_back(name);
463 if (!remove_entries.empty())
465 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
466 ? "Detected OpenCL cache directory for other version of OpenCL device."
467 : "Detected OpenCL cache directories for other versions of OpenCL device.")
468 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
469 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
470 for (size_t i = 0; i < remove_entries.size(); i++)
472 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
474 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
476 for (size_t i = 0; i < remove_entries.size(); i++)
478 const String& name = remove_entries[i];
479 cv::String path = utils::fs::join(cache_path_, name);
482 utils::fs::remove_all(path);
483 CV_LOG_WARNING(NULL, "Removed: " << path);
485 catch (const cv::Exception& e)
487 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
494 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
498 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
499 return target_directory;
502 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
504 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
507 class BinaryProgramFile
509 enum { MAX_ENTRIES = 64 };
511 typedef unsigned int uint32_t;
513 struct CV_DECL_ALIGNED(4) FileHeader
515 uint32_t sourceSignatureSize;
516 //char sourceSignature[];
519 struct CV_DECL_ALIGNED(4) FileTable
521 uint32_t numberOfEntries;
522 //uint32_t firstEntryOffset[];
525 struct CV_DECL_ALIGNED(4) FileEntry
527 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
534 const std::string fileName_;
535 const char* const sourceSignature_;
536 const size_t sourceSignatureSize_;
540 uint32_t entryOffsets[MAX_ENTRIES];
542 uint32_t getHash(const std::string& options)
544 uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
545 return hash & (MAX_ENTRIES - 1);
548 inline size_t getFileSize()
550 size_t pos = (size_t)f.tellg();
551 f.seekg(0, std::fstream::end);
552 size_t fileSize = (size_t)f.tellg();
553 f.seekg(pos, std::fstream::beg);
556 inline uint32_t readUInt32()
559 f.read((char*)&res, sizeof(uint32_t));
560 CV_Assert(!f.fail());
563 inline void writeUInt32(const uint32_t value)
566 f.write((char*)&v, sizeof(uint32_t));
567 CV_Assert(!f.fail());
570 inline void seekReadAbsolute(size_t pos)
572 f.seekg(pos, std::fstream::beg);
573 CV_Assert(!f.fail());
575 inline void seekReadRelative(size_t pos)
577 f.seekg(pos, std::fstream::cur);
578 CV_Assert(!f.fail());
581 inline void seekWriteAbsolute(size_t pos)
583 f.seekp(pos, std::fstream::beg);
584 CV_Assert(!f.fail());
590 if (0 != remove(fileName_.c_str()))
591 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
596 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
597 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
599 CV_StaticAssert(sizeof(uint32_t) == 4, "");
600 CV_Assert(sourceSignature_ != NULL);
601 CV_Assert(sourceSignatureSize_ > 0);
602 memset(entryOffsets, 0, sizeof(entryOffsets));
604 f.rdbuf()->pubsetbuf(0, 0); // disable buffering
605 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
606 if(f.is_open() && getFileSize() > 0)
608 bool isValid = false;
611 uint32_t fileSourceSignatureSize = readUInt32();
612 if (fileSourceSignatureSize == sourceSignatureSize_)
614 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
615 f.read(fileSourceSignature.data(), fileSourceSignatureSize);
618 CV_LOG_ERROR(NULL, "Unexpected EOF");
620 else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
627 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
630 catch (const cv::Exception& e)
632 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
636 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
649 bool read(const std::string& key, std::vector<char>& buf)
654 size_t fileSize = getFileSize();
657 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
664 uint32_t fileSourceSignatureSize = readUInt32();
665 CV_Assert(fileSourceSignatureSize > 0);
666 seekReadRelative(fileSourceSignatureSize);
668 uint32_t numberOfEntries = readUInt32();
669 CV_Assert(numberOfEntries > 0);
670 if (numberOfEntries != MAX_ENTRIES)
672 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
676 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
677 CV_Assert(!f.fail());
679 uint32_t entryNum = getHash(key);
681 uint32_t entryOffset = entryOffsets[entryNum];
683 while (entryOffset > 0)
685 seekReadAbsolute(entryOffset);
686 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
687 f.read((char*)&entry, sizeof(entry));
688 CV_Assert(!f.fail());
689 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
690 if (key.size() == entry.keySize)
692 if (entry.keySize > 0)
694 f.read(fileKey.data(), entry.keySize);
695 CV_Assert(!f.fail());
697 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
699 buf.resize(entry.dataSize);
700 f.read(&buf[0], entry.dataSize);
701 CV_Assert(!f.fail());
703 CV_LOG_VERBOSE(NULL, 0, "Read...");
707 if (entry.nextEntryFileOffset == 0)
709 entryOffset = entry.nextEntryFileOffset;
714 bool write(const std::string& key, std::vector<char>& buf)
718 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
721 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
724 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
730 size_t fileSize = getFileSize();
734 seekWriteAbsolute(0);
735 writeUInt32((uint32_t)sourceSignatureSize_);
736 f.write(sourceSignature_, sourceSignatureSize_);
737 CV_Assert(!f.fail());
739 writeUInt32(MAX_ENTRIES);
740 memset(entryOffsets, 0, sizeof(entryOffsets));
741 f.write((char*)entryOffsets, sizeof(entryOffsets));
742 CV_Assert(!f.fail());
744 CV_Assert(!f.fail());
746 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
747 CV_Assert(f.is_open());
748 fileSize = getFileSize();
753 uint32_t fileSourceSignatureSize = readUInt32();
754 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
755 seekReadRelative(fileSourceSignatureSize);
757 uint32_t numberOfEntries = readUInt32();
758 CV_Assert(numberOfEntries > 0);
759 if (numberOfEntries != MAX_ENTRIES)
761 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
765 size_t tableEntriesOffset = (size_t)f.tellg();
766 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
767 CV_Assert(!f.fail());
769 uint32_t entryNum = getHash(key);
771 uint32_t entryOffset = entryOffsets[entryNum];
773 while (entryOffset > 0)
775 seekReadAbsolute(entryOffset);
776 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
777 f.read((char*)&entry, sizeof(entry));
778 CV_Assert(!f.fail());
779 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
780 if (key.size() == entry.keySize)
782 if (entry.keySize > 0)
784 f.read(fileKey.data(), entry.keySize);
785 CV_Assert(!f.fail());
787 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
790 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
794 if (entry.nextEntryFileOffset == 0)
796 entryOffset = entry.nextEntryFileOffset;
801 seekWriteAbsolute(entryOffset);
802 entry.nextEntryFileOffset = (uint32_t)fileSize;
803 f.write((char*)&entry, sizeof(entry));
804 CV_Assert(!f.fail());
808 entryOffsets[entryNum] = (uint32_t)fileSize;
809 seekWriteAbsolute(tableEntriesOffset);
810 f.write((char*)entryOffsets, sizeof(entryOffsets));
811 CV_Assert(!f.fail());
813 seekWriteAbsolute(fileSize);
814 entry.nextEntryFileOffset = 0;
815 entry.dataSize = (uint32_t)buf.size();
816 entry.keySize = (uint32_t)key.size();
817 f.write((char*)&entry, sizeof(entry));
818 CV_Assert(!f.fail());
819 f.write(key.c_str(), entry.keySize);
820 CV_Assert(!f.fail());
821 f.write(&buf[0], entry.dataSize);
822 CV_Assert(!f.fail());
824 CV_Assert(!f.fail());
825 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
829 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
832 // true if we have initialized OpenCL subsystem with available platforms
833 static bool g_isOpenCVActivated = false;
838 static bool g_isOpenCLInitialized = false;
839 static bool g_isOpenCLAvailable = false;
841 if (!g_isOpenCLInitialized)
843 CV_TRACE_REGION("Init_OpenCL_Runtime");
844 const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
847 if (cv::String(envPath) == "disabled")
849 g_isOpenCLAvailable = false;
850 g_isOpenCLInitialized = true;
853 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
857 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
858 g_isOpenCVActivated = n > 0;
862 g_isOpenCLAvailable = false;
864 g_isOpenCLInitialized = true;
866 return g_isOpenCLAvailable;
871 CoreTLSData& data = getCoreTlsData();
872 if (data.useOpenCL < 0)
876 data.useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
883 return data.useOpenCL > 0;
886 bool isOpenCLActivated()
888 if (!g_isOpenCVActivated)
889 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
893 void setUseOpenCL(bool flag)
897 CoreTLSData& data = getCoreTlsData();
902 else if( haveOpenCL() )
904 data.useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0;
908 #ifdef HAVE_CLAMDBLAS
913 static AmdBlasHelper & getInstance()
915 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
918 bool isAvailable() const
920 return g_isAmdBlasAvailable;
935 if (!g_isAmdBlasInitialized)
937 AutoLock lock(getInitializationMutex());
939 if (!g_isAmdBlasInitialized)
945 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
949 g_isAmdBlasAvailable = false;
953 g_isAmdBlasAvailable = false;
955 g_isAmdBlasInitialized = true;
961 static bool g_isAmdBlasInitialized;
962 static bool g_isAmdBlasAvailable;
965 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
966 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
970 return AmdBlasHelper::getInstance().isAvailable();
987 static AmdFftHelper & getInstance()
989 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
992 bool isAvailable() const
994 return g_isAmdFftAvailable;
1001 // clAmdFftTeardown();
1009 if (!g_isAmdFftInitialized)
1011 AutoLock lock(getInitializationMutex());
1013 if (!g_isAmdFftInitialized)
1019 cl_uint major, minor, patch;
1020 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1022 // it throws exception in case AmdFft binaries are not found
1023 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1024 g_isAmdFftAvailable = true;
1026 catch (const Exception &)
1028 g_isAmdFftAvailable = false;
1032 g_isAmdFftAvailable = false;
1034 g_isAmdFftInitialized = true;
1040 static clAmdFftSetupData setupData;
1041 static bool g_isAmdFftInitialized;
1042 static bool g_isAmdFftAvailable;
1045 clAmdFftSetupData AmdFftHelper::setupData;
1046 bool AmdFftHelper::g_isAmdFftAvailable = false;
1047 bool AmdFftHelper::g_isAmdFftInitialized = false;
1051 return AmdFftHelper::getInstance().isAvailable();
1065 #ifdef HAVE_OPENCL_SVM
1074 Queue::getDefault().finish();
1077 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1079 struct Platform::Impl
1085 initialized = false;
1094 //cl_uint num_entries
1096 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1102 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1104 vendor = String(buf);
1111 IMPLEMENT_REFCOUNTABLE();
1113 cl_platform_id handle;
1118 Platform::Platform()
1123 Platform::~Platform()
1129 Platform::Platform(const Platform& pl)
1136 Platform& Platform::operator = (const Platform& pl)
1138 Impl* newp = (Impl*)pl.p;
1147 void* Platform::ptr() const
1149 return p ? p->handle : 0;
1152 Platform& Platform::getDefault()
1163 /////////////////////////////////////// Device ////////////////////////////////////////////
1165 // deviceVersion has format
1166 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1168 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1169 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1170 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1173 if (10 >= deviceVersion.length())
1175 const char *pstr = deviceVersion.c_str();
1176 if (0 != strncmp(pstr, "OpenCL ", 7))
1178 size_t ppos = deviceVersion.find('.', 7);
1179 if (String::npos == ppos)
1181 String temp = deviceVersion.substr(7, ppos - 7);
1182 major = atoi(temp.c_str());
1183 temp = deviceVersion.substr(ppos + 1);
1184 minor = atoi(temp.c_str());
1191 handle = (cl_device_id)d;
1194 name_ = getStrProp(CL_DEVICE_NAME);
1195 version_ = getStrProp(CL_DEVICE_VERSION);
1196 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1197 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1198 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1199 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1200 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1201 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1202 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1203 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1205 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1206 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1209 while (pos < extensions_.size())
1211 size_t pos2 = extensions_.find(' ', pos);
1212 if (pos2 == String::npos)
1213 pos2 = extensions_.size();
1216 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1217 extensions_set_.insert(extensionName);
1222 intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1224 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1225 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1226 vendorName_ == "AMD")
1227 vendorID_ = VENDOR_AMD;
1228 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1229 vendorID_ = VENDOR_INTEL;
1230 else if (vendorName_ == "NVIDIA Corporation")
1231 vendorID_ = VENDOR_NVIDIA;
1233 vendorID_ = UNKNOWN_VENDOR;
1235 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1236 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1238 const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1239 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1240 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1241 maxWorkGroupSize_ = new_maxWorkGroupSize;
1244 if (isExtensionSupported("cl_khr_spir"))
1246 #ifndef CL_DEVICE_SPIR_VERSIONS
1247 #define CL_DEVICE_SPIR_VERSIONS 0x40E0
1249 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1250 std::cout << spir_versions << std::endl;
1255 template<typename _TpCL, typename _TpOut>
1256 _TpOut getProp(cl_device_info prop) const
1261 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1262 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1265 bool getBoolProp(cl_device_info prop) const
1267 cl_bool temp = CL_FALSE;
1270 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1271 sz == sizeof(temp) ? temp != 0 : false;
1274 String getStrProp(cl_device_info prop) const
1278 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1279 sz < sizeof(buf) ? String(buf) : String();
1282 bool isExtensionSupported(const std::string& extensionName) const
1284 return extensions_set_.count(extensionName) > 0;
1288 IMPLEMENT_REFCOUNTABLE();
1290 cl_device_id handle;
1294 std::string extensions_;
1295 int doubleFPConfig_;
1296 bool hostUnifiedMemory_;
1297 int maxComputeUnits_;
1298 size_t maxWorkGroupSize_;
1301 int deviceVersionMajor_;
1302 int deviceVersionMinor_;
1303 String driverVersion_;
1306 bool intelSubgroupsSupport_;
1308 std::set<std::string> extensions_set_;
1317 Device::Device(void* d)
1323 Device::Device(const Device& d)
1330 Device& Device::operator = (const Device& d)
1332 Impl* newp = (Impl*)d.p;
1347 void Device::set(void* d)
1354 void* Device::ptr() const
1356 return p ? p->handle : 0;
1359 String Device::name() const
1360 { return p ? p->name_ : String(); }
1362 String Device::extensions() const
1363 { return p ? String(p->extensions_) : String(); }
1365 bool Device::isExtensionSupported(const String& extensionName) const
1366 { return p ? p->isExtensionSupported(extensionName) : false; }
1368 String Device::version() const
1369 { return p ? p->version_ : String(); }
1371 String Device::vendorName() const
1372 { return p ? p->vendorName_ : String(); }
1374 int Device::vendorID() const
1375 { return p ? p->vendorID_ : 0; }
1377 String Device::OpenCL_C_Version() const
1378 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1380 String Device::OpenCLVersion() const
1381 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1383 int Device::deviceVersionMajor() const
1384 { return p ? p->deviceVersionMajor_ : 0; }
1386 int Device::deviceVersionMinor() const
1387 { return p ? p->deviceVersionMinor_ : 0; }
1389 String Device::driverVersion() const
1390 { return p ? p->driverVersion_ : String(); }
1392 int Device::type() const
1393 { return p ? p->type_ : 0; }
1395 int Device::addressBits() const
1396 { return p ? p->addressBits_ : 0; }
1398 bool Device::available() const
1399 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1401 bool Device::compilerAvailable() const
1402 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1404 bool Device::linkerAvailable() const
1405 #ifdef CL_VERSION_1_2
1406 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1408 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1411 int Device::doubleFPConfig() const
1412 { return p ? p->doubleFPConfig_ : 0; }
1414 int Device::singleFPConfig() const
1415 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1417 int Device::halfFPConfig() const
1418 #ifdef CL_VERSION_1_2
1419 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1421 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1424 bool Device::endianLittle() const
1425 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1427 bool Device::errorCorrectionSupport() const
1428 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1430 int Device::executionCapabilities() const
1431 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1433 size_t Device::globalMemCacheSize() const
1434 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1436 int Device::globalMemCacheType() const
1437 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1439 int Device::globalMemCacheLineSize() const
1440 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1442 size_t Device::globalMemSize() const
1443 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1445 size_t Device::localMemSize() const
1446 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1448 int Device::localMemType() const
1449 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1451 bool Device::hostUnifiedMemory() const
1452 { return p ? p->hostUnifiedMemory_ : false; }
1454 bool Device::imageSupport() const
1455 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1457 bool Device::imageFromBufferSupport() const
1459 return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1462 uint Device::imagePitchAlignment() const
1464 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1465 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1471 uint Device::imageBaseAddressAlignment() const
1473 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1474 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1480 size_t Device::image2DMaxWidth() const
1481 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1483 size_t Device::image2DMaxHeight() const
1484 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1486 size_t Device::image3DMaxWidth() const
1487 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1489 size_t Device::image3DMaxHeight() const
1490 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1492 size_t Device::image3DMaxDepth() const
1493 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1495 size_t Device::imageMaxBufferSize() const
1496 #ifdef CL_VERSION_1_2
1497 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1499 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1502 size_t Device::imageMaxArraySize() const
1503 #ifdef CL_VERSION_1_2
1504 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1506 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1509 bool Device::intelSubgroupsSupport() const
1510 { return p ? p->intelSubgroupsSupport_ : false; }
1512 int Device::maxClockFrequency() const
1513 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1515 int Device::maxComputeUnits() const
1516 { return p ? p->maxComputeUnits_ : 0; }
1518 int Device::maxConstantArgs() const
1519 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1521 size_t Device::maxConstantBufferSize() const
1522 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1524 size_t Device::maxMemAllocSize() const
1525 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1527 size_t Device::maxParameterSize() const
1528 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1530 int Device::maxReadImageArgs() const
1531 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1533 int Device::maxWriteImageArgs() const
1534 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1536 int Device::maxSamplers() const
1537 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1539 size_t Device::maxWorkGroupSize() const
1540 { return p ? p->maxWorkGroupSize_ : 0; }
1542 int Device::maxWorkItemDims() const
1543 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1545 void Device::maxWorkItemSizes(size_t* sizes) const
1549 const int MAX_DIMS = 32;
1551 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1552 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1556 int Device::memBaseAddrAlign() const
1557 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1559 int Device::nativeVectorWidthChar() const
1560 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1562 int Device::nativeVectorWidthShort() const
1563 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1565 int Device::nativeVectorWidthInt() const
1566 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1568 int Device::nativeVectorWidthLong() const
1569 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1571 int Device::nativeVectorWidthFloat() const
1572 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1574 int Device::nativeVectorWidthDouble() const
1575 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1577 int Device::nativeVectorWidthHalf() const
1578 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1580 int Device::preferredVectorWidthChar() const
1581 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1583 int Device::preferredVectorWidthShort() const
1584 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1586 int Device::preferredVectorWidthInt() const
1587 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1589 int Device::preferredVectorWidthLong() const
1590 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
1592 int Device::preferredVectorWidthFloat() const
1593 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
1595 int Device::preferredVectorWidthDouble() const
1596 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
1598 int Device::preferredVectorWidthHalf() const
1599 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
1601 size_t Device::printfBufferSize() const
1602 #ifdef CL_VERSION_1_2
1603 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
1605 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1609 size_t Device::profilingTimerResolution() const
1610 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
1612 const Device& Device::getDefault()
1614 const Context& ctx = Context::getDefault();
1615 int idx = getCoreTlsData().device;
1616 const Device& device = ctx.device(idx);
1620 ////////////////////////////////////// Context ///////////////////////////////////////////////////
1622 template <typename Functor, typename ObjectType>
1623 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
1626 cl_int err = f(obj, name, 0, NULL, &required);
1627 if (err != CL_SUCCESS)
1633 AutoBuffer<char> buf(required + 1);
1634 char* ptr = buf.data(); // cleanup is not needed
1635 err = f(obj, name, required, ptr, NULL);
1636 if (err != CL_SUCCESS)
1644 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
1649 std::istringstream ss(s);
1653 std::getline(ss, item, delim);
1654 elems.push_back(item);
1658 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
1660 // Sample: AMD:GPU:Tahiti
1661 // Sample: :GPU|CPU: = '' = ':' = '::'
1662 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
1663 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
1665 std::vector<std::string> parts;
1666 split(configurationStr, ':', parts);
1667 if (parts.size() > 3)
1669 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
1672 if (parts.size() > 2)
1673 deviceNameOrID = parts[2];
1674 if (parts.size() > 1)
1676 split(parts[1], '|', deviceTypes);
1678 if (parts.size() > 0)
1680 platform = parts[0];
1685 #if defined WINRT || defined _WIN32_WCE
1686 static cl_device_id selectOpenCLDevice()
1691 // std::tolower is int->int
1692 static char char_tolower(char ch)
1694 return (char)std::tolower((int)ch);
1696 static cl_device_id selectOpenCLDevice()
1698 std::string platform, deviceName;
1699 std::vector<std::string> deviceTypes;
1701 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
1702 if (configuration &&
1703 (strcmp(configuration, "disabled") == 0 ||
1704 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
1710 if (deviceName.length() == 1)
1711 // We limit ID range to 0..9, because we want to write:
1712 // - '2500' to mean i5-2500
1713 // - '8350' to mean AMD FX-8350
1714 // - '650' to mean GeForce 650
1715 // To extend ID range change condition to '> 0'
1718 for (size_t i = 0; i < deviceName.length(); i++)
1720 if (!isdigit(deviceName[i]))
1728 deviceID = atoi(deviceName.c_str());
1734 std::vector<cl_platform_id> platforms;
1736 cl_uint numPlatforms = 0;
1737 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
1739 if (numPlatforms == 0)
1741 platforms.resize((size_t)numPlatforms);
1742 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
1743 platforms.resize(numPlatforms);
1746 int selectedPlatform = -1;
1747 if (platform.length() > 0)
1749 for (size_t i = 0; i < platforms.size(); i++)
1752 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
1753 if (name.find(platform) != std::string::npos)
1755 selectedPlatform = (int)i;
1759 if (selectedPlatform == -1)
1761 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
1765 if (deviceTypes.size() == 0)
1769 deviceTypes.push_back("GPU");
1771 deviceTypes.push_back("CPU");
1774 deviceTypes.push_back("ALL");
1776 for (size_t t = 0; t < deviceTypes.size(); t++)
1779 std::string tempStrDeviceType = deviceTypes[t];
1780 std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), char_tolower);
1782 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
1783 deviceType = Device::TYPE_GPU;
1784 else if (tempStrDeviceType == "cpu")
1785 deviceType = Device::TYPE_CPU;
1786 else if (tempStrDeviceType == "accelerator")
1787 deviceType = Device::TYPE_ACCELERATOR;
1788 else if (tempStrDeviceType == "all")
1789 deviceType = Device::TYPE_ALL;
1792 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
1796 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
1797 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
1798 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
1802 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
1803 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
1805 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
1809 size_t base = devices.size();
1810 devices.resize(base + count);
1811 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
1812 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
1814 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
1818 for (size_t i = (isID ? deviceID : 0);
1819 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
1823 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
1824 cl_bool useGPU = true;
1825 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
1827 cl_bool isIGPU = CL_FALSE;
1828 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
1829 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
1831 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
1833 // TODO check for OpenCL 1.1
1841 return NULL; // suppress messages on stderr
1843 std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << configuration << std::endl
1844 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
1845 << " Device types: ";
1846 for (size_t t = 0; t < deviceTypes.size(); t++)
1847 std::cerr << deviceTypes[t] << " ";
1849 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
1854 #ifdef HAVE_OPENCL_SVM
1857 enum AllocatorFlags { // don't use first 16 bits
1858 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
1859 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
1860 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
1861 OPENCL_SVM_BUFFER_MASK = 3 << 16,
1862 OPENCL_SVM_BUFFER_MAP = 4 << 16
1865 static bool checkForceSVMUmatUsage()
1867 static bool initialized = false;
1868 static bool force = false;
1871 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
1876 static bool checkDisableSVMUMatUsage()
1878 static bool initialized = false;
1879 static bool force = false;
1882 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
1887 static bool checkDisableSVM()
1889 static bool initialized = false;
1890 static bool force = false;
1893 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
1898 // see SVMCapabilities
1899 static unsigned int getSVMCapabilitiesMask()
1901 static bool initialized = false;
1902 static unsigned int mask = 0;
1905 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
1906 if (envValue == NULL)
1908 return ~0U; // all bits 1
1910 mask = atoi(envValue);
1918 static size_t getProgramCountLimit()
1920 static bool initialized = false;
1921 static size_t count = 0;
1924 count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
1930 struct Context::Impl
1932 static Context::Impl* get(Context& context) { return context.p; }
1938 #ifdef HAVE_OPENCL_SVM
1939 svmInitialized = false;
1950 CV_Assert(handle == NULL);
1952 cl_device_id d = selectOpenCLDevice();
1957 cl_platform_id pl = NULL;
1958 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
1960 cl_context_properties prop[] =
1962 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
1966 // !!! in the current implementation force the number of devices to 1 !!!
1970 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
1971 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
1973 bool ok = handle != 0 && status == CL_SUCCESS;
1988 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
1989 cl_context_properties prop[] =
1991 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
1996 int dtype = dtype0 & 15;
1997 cl_int status = clGetDeviceIDs(pl, dtype, 0, NULL, &nd0);
1998 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
2000 CV_OCL_DBG_CHECK_RESULT(status,
2001 cv::format("clGetDeviceIDs(platform=%p, device_type=%d, num_entries=0, devices=NULL, numDevices=%p)", pl, dtype, &nd0).c_str());
2007 AutoBuffer<void*> dlistbuf(nd0*2+1);
2008 cl_device_id* dlist = (cl_device_id*)dlistbuf.data();
2009 cl_device_id* dlist_new = dlist + nd0;
2010 CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0));
2014 for(i = 0; i < nd0; i++)
2017 if( !d.available() || !d.compilerAvailable() )
2019 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
2021 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
2023 String name = d.name();
2024 if( nd != 0 && name != name0 )
2027 dlist_new[nd++] = dlist[i];
2033 // !!! in the current implementation force the number of devices to 1 !!!
2036 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
2037 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateContext");
2038 bool ok = handle != 0 && retval == CL_SUCCESS;
2042 for( i = 0; i < nd; i++ )
2043 devices[i].set(dlist_new[i]);
2051 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2057 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2059 void unloadProg(Program& prog)
2061 cv::AutoLock lock(program_cache_mutex);
2062 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2064 phash_t::iterator it = phash.find(*i);
2065 if (it != phash.end())
2067 if (it->second.ptr() == prog.ptr())
2077 std::string& getPrefixString()
2081 cv::AutoLock lock(program_cache_mutex);
2084 CV_Assert(!devices.empty());
2085 const Device& d = devices[0];
2086 int bits = d.addressBits();
2087 if (bits > 0 && bits != 64)
2088 prefix = cv::format("%d-bit--", bits);
2089 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2091 for (size_t i = 0; i < prefix.size(); i++)
2094 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2104 std::string& getPrefixBase()
2106 if (prefix_base.empty())
2108 cv::AutoLock lock(program_cache_mutex);
2109 if (prefix_base.empty())
2111 const Device& d = devices[0];
2112 int bits = d.addressBits();
2113 if (bits > 0 && bits != 64)
2114 prefix_base = cv::format("%d-bit--", bits);
2115 prefix_base += d.vendorName() + "--" + d.name() + "--";
2117 for (size_t i = 0; i < prefix_base.size(); i++)
2119 char c = prefix_base[i];
2120 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2122 prefix_base[i] = '_';
2130 IMPLEMENT_REFCOUNTABLE();
2133 std::vector<Device> devices;
2136 std::string prefix_base;
2138 cv::Mutex program_cache_mutex;
2139 typedef std::map<std::string, Program> phash_t;
2141 typedef std::list<cv::String> CacheList;
2142 CacheList cacheList;
2144 #ifdef HAVE_OPENCL_SVM
2145 bool svmInitialized;
2148 svm::SVMCapabilities svmCapabilities;
2149 svm::SVMFunctions svmFunctions;
2153 CV_Assert(handle != NULL);
2154 const Device& device = devices[0];
2155 cl_device_svm_capabilities deviceCaps = 0;
2156 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2157 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2158 if (status != CL_SUCCESS)
2160 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2163 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2164 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2165 svmCapabilities.value_ =
2166 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2167 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2168 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2169 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2170 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2171 if (svmCapabilities.value_ == 0)
2173 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2179 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2180 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2183 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2184 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2189 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2190 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2192 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2193 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2198 ((int*)ptr)[0] = 100;
2202 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2205 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2207 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2208 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2213 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2218 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2219 clSVMFree(handle, ptr);
2222 clSVMFree(handle, ptr);
2223 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2224 svmFunctions.fn_clSVMFree = clSVMFree;
2225 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2226 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2227 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2228 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2229 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2230 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2231 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2235 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2238 // Try HSA extension
2239 String extensions = device.extensions();
2240 if (extensions.find("cl_amd_svm") == String::npos)
2242 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2245 cl_platform_id p = NULL;
2246 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2247 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2248 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2249 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2250 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2251 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2252 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2253 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2254 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2255 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2256 CV_Assert(svmFunctions.isValid());
2260 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2265 svmAvailable = true;
2266 svmEnabled = !svm::checkDisableSVM();
2267 svmInitialized = true;
2268 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2271 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2272 svmAvailable = false;
2274 svmCapabilities.value_ = 0;
2275 svmInitialized = true;
2276 svmFunctions.fn_clSVMAlloc = NULL;
2281 friend class Program;
2290 Context::Context(int dtype)
2296 bool Context::create()
2311 bool Context::create(int dtype0)
2317 p = new Impl(dtype0);
2335 Context::Context(const Context& c)
2342 Context& Context::operator = (const Context& c)
2344 Impl* newp = (Impl*)c.p;
2353 void* Context::ptr() const
2355 return p == NULL ? NULL : p->handle;
2358 size_t Context::ndevices() const
2360 return p ? p->devices.size() : 0;
2363 const Device& Context::device(size_t idx) const
2365 static Device dummy;
2366 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2369 Context& Context::getDefault(bool initialize)
2371 static Context* ctx = new Context();
2372 if(!ctx->p && haveOpenCL())
2375 ctx->p = new Impl();
2378 // do not create new Context right away.
2379 // First, try to retrieve existing context of the same type.
2380 // In its turn, Platform::getContext() may call Context::create()
2381 // if there is no such context.
2382 if (ctx->p->handle == NULL)
2383 ctx->p->setDefault();
2390 Program Context::getProg(const ProgramSource& prog,
2391 const String& buildopts, String& errmsg)
2393 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2396 void Context::unloadProg(Program& prog)
2399 p->unloadProg(prog);
2402 #ifdef HAVE_OPENCL_SVM
2403 bool Context::useSVM() const
2405 Context::Impl* i = p;
2407 if (!i->svmInitialized)
2409 return i->svmEnabled;
2411 void Context::setUseSVM(bool enabled)
2413 Context::Impl* i = p;
2415 if (!i->svmInitialized)
2417 if (enabled && !i->svmAvailable)
2419 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
2421 i->svmEnabled = enabled;
2424 bool Context::useSVM() const { return false; }
2425 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
2428 #ifdef HAVE_OPENCL_SVM
2431 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
2433 Context::Impl* i = context.p;
2435 if (!i->svmInitialized)
2437 return i->svmCapabilities;
2440 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
2442 Context::Impl* i = context.p;
2444 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
2445 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
2446 return &i->svmFunctions;
2449 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
2451 if (checkForceSVMUmatUsage())
2453 if (checkDisableSVMUMatUsage())
2455 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
2457 return false; // don't use SVM by default
2460 } // namespace cv::ocl::svm
2461 #endif // HAVE_OPENCL_SVM
2464 static void get_platform_name(cl_platform_id id, String& name)
2466 // get platform name string length
2468 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
2470 // get platform name string
2471 AutoBuffer<char> buf(sz + 1);
2472 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
2474 // just in case, ensure trailing zero for ASCIIZ string
2481 // Attaches OpenCL context to OpenCV
2483 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
2487 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
2490 CV_Error(cv::Error::OpenCLApiCallError, "no OpenCL platform available!");
2492 std::vector<cl_platform_id> platforms(cnt);
2494 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
2496 bool platformAvailable = false;
2498 // check if external platformName contained in list of available platforms in OpenCV
2499 for (unsigned int i = 0; i < cnt; i++)
2501 String availablePlatformName;
2502 get_platform_name(platforms[i], availablePlatformName);
2503 // external platform is found in the list of available platforms
2504 if (platformName == availablePlatformName)
2506 platformAvailable = true;
2511 if (!platformAvailable)
2512 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
2514 // check if platformID corresponds to platformName
2515 String actualPlatformName;
2516 get_platform_name((cl_platform_id)platformID, actualPlatformName);
2517 if (platformName != actualPlatformName)
2518 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
2520 // do not initialize OpenCL context
2521 Context ctx = Context::getDefault(false);
2523 // attach supplied context to OpenCV
2524 initializeContextFromHandle(ctx, platformID, context, deviceID);
2526 CV_OCL_CHECK(clRetainContext((cl_context)context));
2528 // clear command queue, if any
2529 CoreTLSData& data = getCoreTlsData();
2530 data.oclQueue.finish();
2535 } // attachContext()
2538 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
2540 cl_context context = (cl_context)_context;
2541 cl_device_id device = (cl_device_id)_device;
2543 // cleanup old context
2544 Context::Impl * impl = ctx.p;
2547 CV_OCL_DBG_CHECK(clReleaseContext(impl->handle));
2549 impl->devices.clear();
2551 impl->handle = context;
2552 impl->devices.resize(1);
2553 impl->devices[0].set(device);
2555 Platform& p = Platform::getDefault();
2556 Platform::Impl* pImpl = p.p;
2557 pImpl->handle = (cl_platform_id)platform;
2560 /////////////////////////////////////////// Queue /////////////////////////////////////////////
2564 inline void __init()
2568 isProfilingQueue_ = false;
2571 Impl(cl_command_queue q)
2576 cl_command_queue_properties props = 0;
2577 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
2578 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
2581 Impl(cl_command_queue q, bool isProfilingQueue)
2585 isProfilingQueue_ = isProfilingQueue;
2588 Impl(const Context& c, const Device& d, bool withProfiling = false)
2592 const Context* pc = &c;
2593 cl_context ch = (cl_context)pc->ptr();
2596 pc = &Context::getDefault();
2597 ch = (cl_context)pc->ptr();
2599 cl_device_id dh = (cl_device_id)d.ptr();
2601 dh = (cl_device_id)pc->device(0).ptr();
2603 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
2604 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
2605 isProfilingQueue_ = withProfiling;
2611 if (!cv::__termination)
2616 CV_OCL_DBG_CHECK(clFinish(handle));
2617 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
2623 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
2625 if (isProfilingQueue_)
2628 if (profiling_queue_.ptr())
2629 return profiling_queue_;
2632 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
2634 cl_device_id device = 0;
2635 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
2637 cl_int result = CL_SUCCESS;
2638 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
2639 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
2640 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
2643 queue.p = new Impl(q, true);
2644 profiling_queue_ = queue;
2646 return profiling_queue_;
2649 IMPLEMENT_REFCOUNTABLE();
2651 cl_command_queue handle;
2652 bool isProfilingQueue_;
2653 cv::ocl::Queue profiling_queue_;
2661 Queue::Queue(const Context& c, const Device& d)
2667 Queue::Queue(const Queue& q)
2674 Queue& Queue::operator = (const Queue& q)
2676 Impl* newp = (Impl*)q.p;
2691 bool Queue::create(const Context& c, const Device& d)
2696 return p->handle != 0;
2699 void Queue::finish()
2703 CV_OCL_DBG_CHECK(clFinish(p->handle));
2707 const Queue& Queue::getProfilingQueue() const
2710 return p->getProfilingQueue(*this);
2713 void* Queue::ptr() const
2715 return p ? p->handle : 0;
2718 Queue& Queue::getDefault()
2720 Queue& q = getCoreTlsData().oclQueue;
2721 if( !q.p && haveOpenCL() )
2722 q.create(Context::getDefault());
2726 static cl_command_queue getQueue(const Queue& q)
2728 cl_command_queue qq = (cl_command_queue)q.ptr();
2730 qq = (cl_command_queue)Queue::getDefault().ptr();
2734 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
2736 KernelArg::KernelArg()
2737 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
2741 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
2742 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
2744 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
2747 KernelArg KernelArg::Constant(const Mat& m)
2749 CV_Assert(m.isContinuous());
2750 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
2753 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
2757 Impl(const char* kname, const Program& prog) :
2758 refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
2760 cl_program ph = (cl_program)prog.ptr();
2765 handle = clCreateKernel(ph, kname, &retval);
2766 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
2768 for( int i = 0; i < MAX_ARRS; i++ )
2770 haveTempDstUMats = false;
2771 haveTempSrcUMats = false;
2776 for( int i = 0; i < MAX_ARRS; i++ )
2779 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
2781 u[i]->flags |= UMatData::ASYNC_CLEANUP;
2782 u[i]->currAllocator->deallocate(u[i]);
2787 haveTempDstUMats = false;
2788 haveTempSrcUMats = false;
2791 void addUMat(const UMat& m, bool dst)
2793 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
2795 CV_XADD(&m.u->urefcount, 1);
2797 if(dst && m.u->tempUMat())
2798 haveTempDstUMats = true;
2799 if(m.u->originalUMatData == NULL && m.u->tempUMat())
2800 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
2803 void addImage(const Image2D& image)
2805 images.push_back(image);
2808 void finit(cl_event e)
2813 isInProgress = false;
2817 bool run(int dims, size_t _globalsize[], size_t _localsize[],
2818 bool sync, int64* timeNS, const Queue& q);
2824 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
2828 IMPLEMENT_REFCOUNTABLE();
2832 enum { MAX_ARRS = 16 };
2833 UMatData* u[MAX_ARRS];
2835 bool isAsyncRun; // true if kernel was scheduled in async mode
2837 std::list<Image2D> images;
2838 bool haveTempDstUMats;
2839 bool haveTempSrcUMats;
2842 }} // namespace cv::ocl
2846 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2850 ((cv::ocl::Kernel::Impl*)p)->finit(e);
2852 catch (const cv::Exception& exc)
2854 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
2856 catch (const std::exception& exc)
2858 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
2862 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
2868 namespace cv { namespace ocl {
2875 Kernel::Kernel(const char* kname, const Program& prog)
2878 create(kname, prog);
2881 Kernel::Kernel(const char* kname, const ProgramSource& src,
2882 const String& buildopts, String* errmsg)
2885 create(kname, src, buildopts, errmsg);
2888 Kernel::Kernel(const Kernel& k)
2895 Kernel& Kernel::operator = (const Kernel& k)
2897 Impl* newp = (Impl*)k.p;
2912 bool Kernel::create(const char* kname, const Program& prog)
2916 p = new Impl(kname, prog);
2922 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
2928 bool Kernel::create(const char* kname, const ProgramSource& src,
2929 const String& buildopts, String* errmsg)
2937 if( !errmsg ) errmsg = &tempmsg;
2938 const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
2939 return create(kname, prog);
2942 void* Kernel::ptr() const
2944 return p ? p->handle : 0;
2947 bool Kernel::empty() const
2952 int Kernel::set(int i, const void* value, size_t sz)
2954 if (!p || !p->handle)
2961 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
2962 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%p)", p->name.c_str(), (int)i, (int)sz, (void*)value).c_str());
2963 if (retval != CL_SUCCESS)
2968 int Kernel::set(int i, const Image2D& image2D)
2970 p->addImage(image2D);
2971 cl_mem h = (cl_mem)image2D.ptr();
2972 return set(i, &h, sizeof(h));
2975 int Kernel::set(int i, const UMat& m)
2977 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
2980 int Kernel::set(int i, const KernelArg& arg)
2982 if( !p || !p->handle )
2986 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
2987 p->name.c_str(), (int)i));
2995 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
2996 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
2997 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
2998 if (ptronly && arg.m->empty())
3000 cl_mem h_null = (cl_mem)NULL;
3001 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3002 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3005 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3009 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3010 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3016 #ifdef HAVE_OPENCL_SVM
3017 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3019 const Context& ctx = Context::getDefault();
3020 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3021 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3022 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3024 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3026 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3028 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3033 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3034 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3041 else if( arg.m->dims <= 2 )
3044 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3045 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3046 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3047 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3050 if( !(arg.flags & KernelArg::NO_SIZE) )
3052 int cols = u2d.cols*arg.wscale/arg.iwscale;
3053 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3054 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3055 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3056 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3063 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3064 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3065 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3066 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3067 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3068 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3070 if( !(arg.flags & KernelArg::NO_SIZE) )
3072 int cols = u3d.cols*arg.wscale/arg.iwscale;
3073 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3074 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3075 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3076 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3077 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3078 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3082 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
3085 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3086 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3090 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3091 bool sync, const Queue& q)
3096 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3098 CV_Assert(_globalsize != NULL);
3099 for (int i = 0; i < dims; i++)
3101 size_t val = _localsize ? _localsize[i] :
3102 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3103 CV_Assert( val > 0 );
3104 total *= _globalsize[i];
3105 if (_globalsize[i] == 1 && !_localsize)
3107 globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3109 CV_Assert(total > 0);
3111 return p->run(dims, globalsize, _localsize, sync, NULL, q);
3115 static bool isRaiseErrorOnReuseAsyncKernel()
3117 static bool initialized = false;
3118 static bool value = false;
3121 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
3127 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3128 bool sync, int64* timeNS, const Queue& q)
3130 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3134 CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
3140 CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3141 if (isRaiseErrorOnReuseAsyncKernel())
3143 return false; // OpenCV 5.0: raise error
3149 CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3150 if (isRaiseErrorOnReuseAsyncKernel())
3152 return false; // OpenCV 5.0: raise error
3155 cl_command_queue qq = getQueue(q);
3156 if (haveTempDstUMats)
3158 if (haveTempSrcUMats)
3162 cl_event asyncEvent = 0;
3163 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3164 NULL, globalsize, localsize, 0, 0,
3165 (sync && !timeNS) ? 0 : &asyncEvent);
3166 #if !CV_OPENCL_SHOW_RUN_KERNELS
3167 if (retval != CL_SUCCESS)
3170 cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%dx%dx%d, localsize=%s) sync=%s", name.c_str(), (int)dims,
3171 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3172 (localsize ? cv::format("%dx%dx%d", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3173 sync ? "true" : "false"
3175 if (retval != CL_SUCCESS)
3177 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3179 #if CV_OPENCL_TRACE_CHECK
3180 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3182 printf("%s\n", msg.c_str());
3186 if (sync || retval != CL_SUCCESS)
3188 CV_OCL_DBG_CHECK(clFinish(qq));
3191 if (retval == CL_SUCCESS)
3193 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3194 cl_ulong startTime, stopTime;
3195 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3196 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3197 *timeNS = (int64)(stopTime - startTime);
3209 isInProgress = true;
3210 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3213 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3214 return retval == CL_SUCCESS;
3217 bool Kernel::runTask(bool sync, const Queue& q)
3219 if(!p || !p->handle || p->isInProgress)
3222 cl_command_queue qq = getQueue(q);
3223 cl_event asyncEvent = 0;
3224 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3225 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3226 if (sync || retval != CL_SUCCESS)
3228 CV_OCL_DBG_CHECK(clFinish(qq));
3234 p->isInProgress = true;
3235 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3238 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3239 return retval == CL_SUCCESS;
3242 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3244 CV_Assert(p && p->handle && !p->isInProgress);
3245 Queue q = q_.ptr() ? q_ : Queue::getDefault();
3247 q.finish(); // call clFinish() on base queue
3248 Queue profilingQueue = q.getProfilingQueue();
3250 bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3251 return res ? timeNs : -1;
3254 size_t Kernel::workGroupSize() const
3256 if(!p || !p->handle)
3258 size_t val = 0, retsz = 0;
3259 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3260 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3261 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3262 return status == CL_SUCCESS ? val : 0;
3265 size_t Kernel::preferedWorkGroupSizeMultiple() const
3267 if(!p || !p->handle)
3269 size_t val = 0, retsz = 0;
3270 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3271 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3272 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3273 return status == CL_SUCCESS ? val : 0;
3276 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3278 if(!p || !p->handle || !wsz)
3281 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3282 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3283 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3284 return status == CL_SUCCESS;
3287 size_t Kernel::localMemSize() const
3289 if(!p || !p->handle)
3293 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3294 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
3295 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
3296 return status == CL_SUCCESS ? (size_t)val : 0;
3301 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3303 struct ProgramSource::Impl
3305 IMPLEMENT_REFCOUNTABLE();
3308 PROGRAM_SOURCE_CODE = 0,
3314 Impl(const String& src)
3316 init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
3317 initFromSource(src, cv::String());
3319 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
3321 init(PROGRAM_SOURCE_CODE, module, name);
3322 initFromSource(codeStr, codeHash);
3326 void init(enum KIND kind, const String& module, const String& name)
3335 isHashUpdated = false;
3338 void initFromSource(const String& codeStr, const String& codeHash)
3341 sourceHash_ = codeHash;
3342 if (sourceHash_.empty())
3348 isHashUpdated = true;
3352 void updateHash(const char* hashStr = NULL)
3356 sourceHash_ = cv::String(hashStr);
3357 isHashUpdated = true;
3363 case PROGRAM_SOURCE_CODE:
3366 CV_Assert(codeStr_.empty());
3367 hash = crc64(sourceAddr_, sourceSize_); // static storage
3371 CV_Assert(!codeStr_.empty());
3372 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
3375 case PROGRAM_BINARIES:
3378 hash = crc64(sourceAddr_, sourceSize_);
3381 CV_Error(Error::StsInternal, "Internal error");
3383 sourceHash_ = cv::format("%08llx", hash);
3384 isHashUpdated = true;
3387 Impl(enum KIND kind,
3388 const String& module, const String& name,
3389 const unsigned char* binary, const size_t size,
3390 const cv::String& buildOptions = cv::String())
3392 init(kind, module, name);
3394 sourceAddr_ = binary;
3397 buildOptions_ = buildOptions;
3400 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
3401 const char* sourceCodeStaticStr, const char* hashStaticStr,
3402 const cv::String& buildOptions)
3404 ProgramSource result;
3405 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
3406 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
3407 result.p->updateHash(hashStaticStr);
3411 static ProgramSource fromBinary(const String& module, const String& name,
3412 const unsigned char* binary, const size_t size,
3413 const cv::String& buildOptions)
3415 ProgramSource result;
3416 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
3420 static ProgramSource fromSPIR(const String& module, const String& name,
3421 const unsigned char* binary, const size_t size,
3422 const cv::String& buildOptions)
3424 ProgramSource result;
3425 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
3432 // TODO std::vector<ProgramSource> includes_;
3433 String codeStr_; // PROGRAM_SOURCE_CODE only
3435 const unsigned char* sourceAddr_;
3438 cv::String buildOptions_;
3443 friend struct Program::Impl;
3444 friend struct internal::ProgramEntry;
3445 friend struct Context::Impl;
3449 ProgramSource::ProgramSource()
3454 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
3456 p = new Impl(module, name, codeStr, codeHash);
3459 ProgramSource::ProgramSource(const char* prog)
3464 ProgramSource::ProgramSource(const String& prog)
3469 ProgramSource::~ProgramSource()
3475 ProgramSource::ProgramSource(const ProgramSource& prog)
3482 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
3484 Impl* newp = (Impl*)prog.p;
3493 const String& ProgramSource::source() const
3496 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
3497 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
3501 ProgramSource::hash_t ProgramSource::hash() const
3503 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
3506 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
3507 const unsigned char* binary, const size_t size,
3508 const cv::String& buildOptions)
3511 CV_Assert(size > 0);
3512 return Impl::fromBinary(module, name, binary, size, buildOptions);
3515 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
3516 const unsigned char* binary, const size_t size,
3517 const cv::String& buildOptions)
3520 CV_Assert(size > 0);
3521 return Impl::fromBinary(module, name, binary, size, buildOptions);
3525 internal::ProgramEntry::operator ProgramSource&() const
3527 if (this->pProgramSource == NULL)
3529 cv::AutoLock lock(cv::getInitializationMutex());
3530 if (this->pProgramSource == NULL)
3532 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
3533 ProgramSource* ptr = new ProgramSource(ps);
3534 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
3537 return *this->pProgramSource;
3542 /////////////////////////////////////////// Program /////////////////////////////////////////////
3545 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
3553 return a + (cv::String(" ") + b);
3556 struct Program::Impl
3558 IMPLEMENT_REFCOUNTABLE();
3560 Impl(const ProgramSource& src,
3561 const String& _buildflags, String& errmsg) :
3564 buildflags(_buildflags)
3566 const ProgramSource::Impl* src_ = src.getImpl();
3568 sourceModule_ = src_->module_;
3569 sourceName_ = src_->name_;
3570 const Context ctx = Context::getDefault();
3571 Device device = ctx.device(0);
3572 if (ctx.ptr() == NULL || device.ptr() == NULL)
3574 buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
3575 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
3578 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
3579 else if (device.isIntel())
3580 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
3581 const String param_buildExtraOptions = getBuildExtraOptions();
3582 if (!param_buildExtraOptions.empty())
3583 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
3585 compile(ctx, src_, errmsg);
3588 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3590 CV_Assert(ctx.getImpl());
3593 // We don't cache OpenCL binaries
3594 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
3596 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
3597 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
3600 return compileWithCache(ctx, src_, errmsg);
3603 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3605 CV_Assert(ctx.getImpl());
3607 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
3609 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
3610 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
3611 const std::string base_dir = config.prepareCacheDirectoryForContext(
3612 ctx.getImpl()->getPrefixString(),
3613 ctx.getImpl()->getPrefixBase()
3615 const String& hash_str = src_->sourceHash_;
3617 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
3619 CV_Assert(!hash_str.empty());
3620 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
3621 fname = utils::fs::join(base_dir, fname);
3623 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
3624 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
3628 std::vector<char> binaryBuf;
3631 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
3632 BinaryProgramFile file(fname, hash_str.c_str());
3633 res = file.read(buildflags, binaryBuf);
3637 CV_Assert(!binaryBuf.empty());
3638 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
3639 bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
3644 catch (const cv::Exception& e)
3647 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
3651 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
3654 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
3655 CV_Assert(handle == NULL);
3656 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
3658 if (!buildFromSources(ctx, src_, errmsg))
3663 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
3665 buildflags = joinBuildOptions(buildflags, " -x spir");
3666 if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
3668 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
3670 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
3671 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
3675 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
3677 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
3681 CV_Error(Error::StsInternal, "Internal error");
3683 CV_Assert(handle != NULL);
3684 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
3685 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
3689 std::vector<char> binaryBuf;
3690 getProgramBinary(binaryBuf);
3692 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
3693 BinaryProgramFile file(fname, hash_str.c_str());
3694 file.write(buildflags, binaryBuf);
3697 catch (const cv::Exception& e)
3699 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
3703 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
3706 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
3707 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3708 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3710 std::vector<char> binaryBuf;
3711 getProgramBinary(binaryBuf);
3712 if (!binaryBuf.empty())
3714 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3716 createFromBinary(ctx, binaryBuf, errmsg);
3720 return handle != NULL;
3723 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
3725 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
3728 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
3729 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
3730 if (log_retval == CL_SUCCESS && retsz > 1)
3732 buffer.resize(retsz + 16);
3733 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
3734 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
3735 if (log_retval == CL_SUCCESS)
3737 if (retsz < buffer.size())
3740 buffer[buffer.size() - 1] = 0;
3748 errmsg = String(buffer.data());
3749 printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
3750 sourceModule_.c_str(), sourceName_.c_str(),
3751 result, getOpenCLErrorString(result),
3752 buildflags.c_str(), errmsg.c_str());
3756 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3759 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
3760 CV_Assert(handle == NULL);
3761 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
3762 sourceModule_.c_str(), sourceName_.c_str(),
3763 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
3765 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
3767 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
3768 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
3769 CV_Assert(srcptr != NULL);
3770 CV_Assert(srclen > 0);
3774 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
3775 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
3776 CV_Assert(handle || retval != CL_SUCCESS);
3777 if (handle && retval == CL_SUCCESS)
3779 size_t n = ctx.ndevices();
3780 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
3781 cl_device_id* deviceList = deviceListBuf.data();
3782 for (size_t i = 0; i < n; i++)
3784 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
3787 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
3788 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
3789 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
3790 if (retval != CL_SUCCESS)
3793 dumpBuildLog_(retval, deviceList, errmsg);
3795 // don't remove "retval != CL_SUCCESS" condition here:
3796 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
3797 if (retval != CL_SUCCESS && handle)
3799 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3803 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3804 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3806 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
3808 char kernels_buffer[4096] = {0};
3809 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
3810 if (retsz < sizeof(kernels_buffer))
3811 kernels_buffer[retsz] = 0;
3813 kernels_buffer[0] = 0;
3814 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
3819 return handle != NULL;
3822 void getProgramBinary(std::vector<char>& buf)
3826 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
3828 uchar* ptr = (uchar*)&buf[0];
3829 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
3832 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
3834 return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
3837 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
3839 CV_Assert(handle == NULL);
3840 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
3841 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
3843 CV_Assert(binarySize > 0);
3845 size_t ndevices = (int)ctx.ndevices();
3846 AutoBuffer<cl_device_id> devices_(ndevices);
3847 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
3848 AutoBuffer<size_t> binarySizes_(ndevices);
3850 cl_device_id* devices = devices_.data();
3851 const uchar** binaryPtrs = binaryPtrs_.data();
3852 size_t* binarySizes = binarySizes_.data();
3853 for (size_t i = 0; i < ndevices; i++)
3855 devices[i] = (cl_device_id)ctx.device(i).ptr();
3856 binaryPtrs[i] = binaryAddr;
3857 binarySizes[i] = binarySize;
3861 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
3862 binarySizes, binaryPtrs, NULL, &result);
3863 if (result != CL_SUCCESS)
3865 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
3868 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3876 // call clBuildProgram()
3878 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
3879 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
3880 if (result != CL_SUCCESS)
3882 dumpBuildLog_(result, devices, errmsg);
3885 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3891 // check build status
3893 cl_build_status build_status = CL_BUILD_NONE;
3895 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
3896 sizeof(build_status), &build_status, &retsz));
3897 if (result == CL_SUCCESS)
3899 if (build_status == CL_BUILD_SUCCESS)
3905 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
3911 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
3914 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3919 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3920 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3922 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
3924 char kernels_buffer[4096] = {0};
3925 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
3926 if (retsz < sizeof(kernels_buffer))
3927 kernels_buffer[retsz] = 0;
3929 kernels_buffer[0] = 0;
3930 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
3933 return handle != NULL;
3941 if (!cv::__termination)
3944 clReleaseProgram(handle);
3953 String sourceModule_;
3958 Program::Program() { p = 0; }
3960 Program::Program(const ProgramSource& src,
3961 const String& buildflags, String& errmsg)
3964 create(src, buildflags, errmsg);
3967 Program::Program(const Program& prog)
3974 Program& Program::operator = (const Program& prog)
3976 Impl* newp = (Impl*)prog.p;
3991 bool Program::create(const ProgramSource& src,
3992 const String& buildflags, String& errmsg)
3999 p = new Impl(src, buildflags, errmsg);
4008 void* Program::ptr() const
4010 return p ? p->handle : 0;
4013 #ifndef OPENCV_REMOVE_DEPRECATED_API
4014 const ProgramSource& Program::source() const
4016 CV_Error(Error::StsNotImplemented, "Removed API");
4019 bool Program::read(const String& bin, const String& buildflags)
4021 CV_UNUSED(bin); CV_UNUSED(buildflags);
4022 CV_Error(Error::StsNotImplemented, "Removed API");
4025 bool Program::write(String& bin) const
4028 CV_Error(Error::StsNotImplemented, "Removed API");
4031 String Program::getPrefix() const
4035 Context::Impl* ctx_ = Context::getDefault().getImpl();
4037 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4040 String Program::getPrefix(const String& buildflags)
4042 Context::Impl* ctx_ = Context::getDefault().getImpl();
4044 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4046 #endif // OPENCV_REMOVE_DEPRECATED_API
4048 void Program::getBinary(std::vector<char>& binary) const
4050 CV_Assert(p && "Empty program");
4051 p->getProgramBinary(binary);
4054 Program Context::Impl::getProg(const ProgramSource& src,
4055 const String& buildflags, String& errmsg)
4057 size_t limit = getProgramCountLimit();
4058 const ProgramSource::Impl* src_ = src.getImpl();
4060 String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4061 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4062 getPrefixString().c_str(),
4063 buildflags.c_str());
4065 cv::AutoLock lock(program_cache_mutex);
4066 phash_t::iterator it = phash.find(key);
4067 if (it != phash.end())
4070 CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4071 if (i != cacheList.end() && i != cacheList.begin())
4074 cacheList.push_front(key);
4078 { // cleanup program cache
4079 size_t sz = phash.size();
4080 if (limit > 0 && sz >= limit)
4082 static bool warningFlag = false;
4085 printf("\nWARNING: OpenCV-OpenCL:\n"
4086 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4087 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4090 while (!cacheList.empty())
4092 size_t c = phash.erase(cacheList.back());
4093 cacheList.pop_back();
4100 Program prog(src, buildflags, errmsg);
4101 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4103 cv::AutoLock lock(program_cache_mutex);
4104 phash.insert(std::pair<std::string, Program>(key, prog));
4105 cacheList.push_front(key);
4111 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4113 template<typename T>
4114 class OpenCLBufferPool
4117 ~OpenCLBufferPool() { }
4119 virtual T allocate(size_t size) = 0;
4120 virtual void release(T buffer) = 0;
4123 template <typename Derived, typename BufferEntry, typename T>
4124 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4127 inline Derived& derived() { return *static_cast<Derived*>(this); }
4131 size_t currentReservedSize;
4132 size_t maxReservedSize;
4134 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4135 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4138 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4140 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4141 for (; i != allocatedEntries_.end(); ++i)
4143 BufferEntry& e = *i;
4144 if (e.clBuffer_ == buffer)
4147 allocatedEntries_.erase(i);
4155 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4157 if (reservedEntries_.empty())
4159 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4160 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4162 size_t minDiff = (size_t)(-1);
4163 for (; i != reservedEntries_.end(); ++i)
4165 BufferEntry& e = *i;
4166 if (e.capacity_ >= size)
4168 size_t diff = e.capacity_ - size;
4169 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4179 if (result_pos != reservedEntries_.end())
4181 //CV_DbgAssert(result == *result_pos);
4182 reservedEntries_.erase(result_pos);
4184 currentReservedSize -= entry.capacity_;
4185 allocatedEntries_.push_back(entry);
4192 void _checkSizeOfReservedEntries()
4194 while (currentReservedSize > maxReservedSize)
4196 CV_DbgAssert(!reservedEntries_.empty());
4197 const BufferEntry& entry = reservedEntries_.back();
4198 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4199 currentReservedSize -= entry.capacity_;
4200 derived()._releaseBufferEntry(entry);
4201 reservedEntries_.pop_back();
4205 inline size_t _allocationGranularity(size_t size)
4208 if (size < 1024*1024)
4209 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4210 else if (size < 16*1024*1024)
4217 OpenCLBufferPoolBaseImpl()
4218 : currentReservedSize(0),
4223 virtual ~OpenCLBufferPoolBaseImpl()
4225 freeAllReservedBuffers();
4226 CV_Assert(reservedEntries_.empty());
4229 virtual T allocate(size_t size) CV_OVERRIDE
4231 AutoLock locker(mutex_);
4233 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4235 CV_DbgAssert(size <= entry.capacity_);
4236 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4240 derived()._allocateBufferEntry(entry, size);
4242 return entry.clBuffer_;
4244 virtual void release(T buffer) CV_OVERRIDE
4246 AutoLock locker(mutex_);
4248 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
4249 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
4251 derived()._releaseBufferEntry(entry);
4255 reservedEntries_.push_front(entry);
4256 currentReservedSize += entry.capacity_;
4257 _checkSizeOfReservedEntries();
4261 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
4262 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
4263 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
4265 AutoLock locker(mutex_);
4266 size_t oldMaxReservedSize = maxReservedSize;
4267 maxReservedSize = size;
4268 if (maxReservedSize < oldMaxReservedSize)
4270 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4271 for (; i != reservedEntries_.end();)
4273 const BufferEntry& entry = *i;
4274 if (entry.capacity_ > maxReservedSize / 8)
4276 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4277 currentReservedSize -= entry.capacity_;
4278 derived()._releaseBufferEntry(entry);
4279 i = reservedEntries_.erase(i);
4284 _checkSizeOfReservedEntries();
4287 virtual void freeAllReservedBuffers() CV_OVERRIDE
4289 AutoLock locker(mutex_);
4290 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
4291 for (; i != reservedEntries_.end(); ++i)
4293 const BufferEntry& entry = *i;
4294 derived()._releaseBufferEntry(entry);
4296 reservedEntries_.clear();
4297 currentReservedSize = 0;
4301 struct CLBufferEntry
4305 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
4308 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
4311 typedef struct CLBufferEntry BufferEntry;
4315 OpenCLBufferPoolImpl(int createFlags = 0)
4316 : createFlags_(createFlags)
4320 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4322 CV_DbgAssert(entry.clBuffer_ == NULL);
4323 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4324 Context& ctx = Context::getDefault();
4325 cl_int retval = CL_SUCCESS;
4326 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
4327 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
4328 CV_Assert(entry.clBuffer_ != NULL);
4329 if(retval == CL_SUCCESS)
4331 CV_IMPL_ADD(CV_IMPL_OCL);
4333 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
4334 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4335 allocatedEntries_.push_back(entry);
4338 void _releaseBufferEntry(const BufferEntry& entry)
4340 CV_Assert(entry.capacity_ != 0);
4341 CV_Assert(entry.clBuffer_ != NULL);
4342 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
4343 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4344 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
4348 #ifdef HAVE_OPENCL_SVM
4349 struct CLSVMBufferEntry
4353 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
4355 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
4358 typedef struct CLSVMBufferEntry BufferEntry;
4360 OpenCLSVMBufferPoolImpl()
4364 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4366 CV_DbgAssert(entry.clBuffer_ == NULL);
4367 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4369 Context& ctx = Context::getDefault();
4370 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4371 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4372 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
4373 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4375 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4376 CV_DbgAssert(svmFns->isValid());
4378 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
4379 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
4382 entry.clBuffer_ = buf;
4384 CV_IMPL_ADD(CV_IMPL_OCL);
4386 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
4387 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4388 allocatedEntries_.push_back(entry);
4391 void _releaseBufferEntry(const BufferEntry& entry)
4393 CV_Assert(entry.capacity_ != 0);
4394 CV_Assert(entry.clBuffer_ != NULL);
4395 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
4396 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4397 Context& ctx = Context::getDefault();
4398 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4399 CV_DbgAssert(svmFns->isValid());
4400 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
4401 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
4408 template <bool readAccess, bool writeAccess>
4409 class AlignedDataPtr
4413 uchar* const originPtr_;
4414 const size_t alignment_;
4416 uchar* allocatedPtr_;
4419 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
4420 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
4422 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4423 CV_DbgAssert(!readAccess || ptr);
4424 if (((size_t)ptr_ & (alignment - 1)) != 0)
4426 allocatedPtr_ = new uchar[size_ + alignment - 1];
4427 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4430 memcpy(ptr_, originPtr_, size_);
4435 uchar* getAlignedPtr() const
4437 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
4447 memcpy(originPtr_, ptr_, size_);
4449 delete[] allocatedPtr_;
4450 allocatedPtr_ = NULL;
4455 AlignedDataPtr(const AlignedDataPtr&); // disabled
4456 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
4459 template <bool readAccess, bool writeAccess>
4460 class AlignedDataPtr2D
4464 uchar* const originPtr_;
4465 const size_t alignment_;
4467 uchar* allocatedPtr_;
4473 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
4474 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
4476 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4477 CV_DbgAssert(!readAccess || ptr != NULL);
4478 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
4480 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
4481 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4484 for (size_t i = 0; i < rows_; i++)
4485 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
4490 uchar* getAlignedPtr() const
4492 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
4502 for (size_t i = 0; i < rows_; i++)
4503 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
4505 delete[] allocatedPtr_;
4506 allocatedPtr_ = NULL;
4511 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
4512 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
4515 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
4516 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
4519 class OpenCLAllocator CV_FINAL : public MatAllocator
4521 mutable OpenCLBufferPoolImpl bufferPool;
4522 mutable OpenCLBufferPoolImpl bufferPoolHostPtr;
4523 #ifdef HAVE_OPENCL_SVM
4524 mutable OpenCLSVMBufferPoolImpl bufferPoolSVM;
4530 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
4531 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
4532 #ifdef HAVE_OPENCL_SVM
4533 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
4535 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
4540 bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR)
4542 size_t defaultPoolSize, poolSize;
4543 defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
4544 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
4545 bufferPool.setMaxReservedSize(poolSize);
4546 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
4547 bufferPoolHostPtr.setMaxReservedSize(poolSize);
4548 #ifdef HAVE_OPENCL_SVM
4549 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
4550 bufferPoolSVM.setMaxReservedSize(poolSize);
4553 matStdAllocator = Mat::getDefaultAllocator();
4557 flushCleanupQueue();
4560 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
4561 int flags, UMatUsageFlags usageFlags) const
4563 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
4567 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
4569 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
4572 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
4574 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
4578 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
4580 const Device& dev = ctx.device(0);
4582 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
4583 createFlags |= CL_MEM_ALLOC_HOST_PTR;
4585 if (!isOpenCLCopyingForced() &&
4586 (isOpenCLMapForced() ||
4587 (dev.hostUnifiedMemory()
4596 flags0 = UMatData::COPY_ON_MAP;
4599 UMatData* allocate(int dims, const int* sizes, int type,
4600 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
4603 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
4604 CV_Assert(data == 0);
4605 size_t total = CV_ELEM_SIZE(type);
4606 for( int i = dims-1; i >= 0; i-- )
4613 Context& ctx = Context::getDefault();
4614 flushCleanupQueue();
4616 int createFlags = 0, flags0 = 0;
4617 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
4619 void* handle = NULL;
4620 int allocatorFlags = 0;
4622 #ifdef HAVE_OPENCL_SVM
4623 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4624 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
4626 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
4627 handle = bufferPoolSVM.allocate(total);
4629 // this property is constant, so single buffer pool can be used here
4630 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4631 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
4635 if (createFlags == 0)
4637 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
4638 handle = bufferPool.allocate(total);
4640 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
4642 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
4643 handle = bufferPoolHostPtr.allocate(total);
4647 CV_Assert(handle != NULL); // Unsupported, throw
4651 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
4653 UMatData* u = new UMatData(this);
4658 u->allocatorFlags_ = allocatorFlags;
4659 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
4660 u->markHostCopyObsolete(true);
4661 opencl_allocator_stats.onAllocate(u->size);
4665 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
4670 flushCleanupQueue();
4672 UMatDataAutoLock lock(u);
4676 CV_Assert(u->origdata != 0);
4677 Context& ctx = Context::getDefault();
4678 int createFlags = 0, flags0 = 0;
4679 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
4681 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
4683 cl_context ctx_handle = (cl_context)ctx.ptr();
4684 int allocatorFlags = 0;
4685 int tempUMatFlags = 0;
4686 void* handle = NULL;
4687 cl_int retval = CL_SUCCESS;
4689 #ifdef HAVE_OPENCL_SVM
4690 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4691 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
4692 if (useSVM && svmCaps.isSupportFineGrainSystem())
4694 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
4695 tempUMatFlags = UMatData::TEMP_UMAT;
4696 handle = u->origdata;
4697 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
4699 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
4701 if (!(accessFlags & ACCESS_FAST)) // memcpy used
4703 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4705 cl_svm_mem_flags memFlags = createFlags |
4706 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4708 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4709 CV_DbgAssert(svmFns->isValid());
4711 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
4712 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
4715 cl_command_queue q = NULL;
4716 if (!isFineGrainBuffer)
4718 q = (cl_command_queue)Queue::getDefault().ptr();
4719 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
4720 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
4723 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
4726 memcpy(handle, u->origdata, u->size);
4727 if (!isFineGrainBuffer)
4729 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
4730 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
4731 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4734 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
4735 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
4736 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
4743 accessFlags &= ~ACCESS_FAST;
4745 tempUMatFlags = UMatData::TEMP_UMAT;
4750 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
4751 // There are OpenCL runtime issues for less aligned data
4752 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
4753 && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
4754 // Avoid sharing of host memory between OpenCL buffers
4755 && !(u->originalUMatData && u->originalUMatData->handle)
4758 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
4759 u->size, u->origdata, &retval);
4760 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
4761 (long long int)u->size, u->origdata, (void*)handle).c_str());
4763 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
4765 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
4766 u->size, u->origdata, &retval);
4767 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
4768 (long long int)u->size, u->origdata, (void*)handle).c_str());
4769 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
4772 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
4773 if(!handle || retval != CL_SUCCESS)
4776 u->prevAllocator = u->currAllocator;
4777 u->currAllocator = this;
4778 u->flags |= tempUMatFlags | flags0;
4779 u->allocatorFlags_ = allocatorFlags;
4781 if(accessFlags & ACCESS_WRITE)
4782 u->markHostCopyObsolete(true);
4783 opencl_allocator_stats.onAllocate(u->size);
4787 /*void sync(UMatData* u) const
4789 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4790 UMatDataAutoLock lock(u);
4792 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
4794 if( u->tempCopiedUMat() )
4796 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4797 u->size, u->origdata, 0, 0, 0);
4802 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
4803 (CL_MAP_READ | CL_MAP_WRITE),
4804 0, u->size, 0, 0, 0, &retval);
4805 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
4808 u->markHostCopyObsolete(false);
4810 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
4812 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4813 u->size, u->data, 0, 0, 0);
4817 void deallocate(UMatData* u) const CV_OVERRIDE
4822 CV_Assert(u->urefcount == 0);
4823 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
4825 CV_Assert(u->handle != 0);
4826 CV_Assert(u->mapcount == 0);
4828 if (u->flags & UMatData::ASYNC_CLEANUP)
4829 addToCleanupQueue(u);
4834 void deallocate_(UMatData* u) const
4837 CV_Assert(u->handle);
4838 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
4840 opencl_allocator_stats.onFree(u->size);
4844 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
4845 return; // avoid any OpenCL calls
4849 CV_Assert(u->origdata);
4850 // UMatDataAutoLock lock(u);
4852 if (u->hostCopyObsolete())
4854 #ifdef HAVE_OPENCL_SVM
4855 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
4857 Context& ctx = Context::getDefault();
4858 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4859 CV_DbgAssert(svmFns->isValid());
4861 if( u->tempCopiedUMat() )
4863 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
4864 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
4865 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
4866 cl_command_queue q = NULL;
4867 if (!isFineGrainBuffer)
4869 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
4870 q = (cl_command_queue)Queue::getDefault().ptr();
4871 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
4872 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
4875 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
4878 memcpy(u->origdata, u->handle, u->size);
4879 if (!isFineGrainBuffer)
4881 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
4882 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
4883 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4888 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
4895 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4896 if( u->tempCopiedUMat() )
4898 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
4899 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4900 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
4907 CV_Assert(u->mapcount == 0);
4908 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
4909 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
4910 (CL_MAP_READ | CL_MAP_WRITE),
4911 0, u->size, 0, 0, 0, &retval);
4912 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
4913 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
4914 if (u->originalUMatData)
4916 CV_Assert(u->originalUMatData->data == data);
4918 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
4919 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
4920 CV_OCL_DBG_CHECK(clFinish(q));
4924 u->markHostCopyObsolete(false);
4930 #ifdef HAVE_OPENCL_SVM
4931 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
4933 if( u->tempCopiedUMat() )
4935 Context& ctx = Context::getDefault();
4936 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4937 CV_DbgAssert(svmFns->isValid());
4939 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
4940 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
4946 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
4947 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
4950 u->markDeviceCopyObsolete(true);
4951 u->currAllocator = u->prevAllocator;
4952 u->prevAllocator = NULL;
4953 if(u->data && u->copyOnMap() && u->data != u->origdata)
4955 u->data = u->origdata;
4956 u->currAllocator->deallocate(u);
4961 CV_Assert(u->origdata == NULL);
4962 if(u->data && u->copyOnMap() && u->data != u->origdata)
4966 u->markHostCopyObsolete(true);
4968 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
4970 bufferPool.release((cl_mem)u->handle);
4972 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
4974 bufferPoolHostPtr.release((cl_mem)u->handle);
4976 #ifdef HAVE_OPENCL_SVM
4977 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
4979 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
4983 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
4984 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
4986 Context& ctx = Context::getDefault();
4987 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4988 CV_DbgAssert(svmFns->isValid());
4989 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4991 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
4993 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
4994 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
4995 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4998 bufferPoolSVM.release((void*)u->handle);
5003 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5006 u->markDeviceCopyObsolete(true);
5010 CV_Assert(u == NULL);
5013 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
5014 void map(UMatData* u, int accessFlags) const CV_OVERRIDE
5016 CV_Assert(u && u->handle);
5018 if(accessFlags & ACCESS_WRITE)
5019 u->markDeviceCopyObsolete(true);
5021 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5024 if( !u->copyOnMap() )
5027 // because there can be other map requests for the same UMat with different access flags,
5028 // we use the universal (read-write) access mode.
5029 #ifdef HAVE_OPENCL_SVM
5030 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5032 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5034 Context& ctx = Context::getDefault();
5035 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5036 CV_DbgAssert(svmFns->isValid());
5038 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5040 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5041 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5044 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5045 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5049 u->data = (uchar*)u->handle;
5050 u->markHostCopyObsolete(false);
5051 u->markDeviceMemMapped(true);
5056 cl_int retval = CL_SUCCESS;
5057 if (!u->deviceMemMapped())
5059 CV_Assert(u->refcount == 1);
5060 CV_Assert(u->mapcount++ == 0);
5061 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5062 (CL_MAP_READ | CL_MAP_WRITE),
5063 0, u->size, 0, 0, 0, &retval);
5064 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5066 if (u->data && retval == CL_SUCCESS)
5068 u->markHostCopyObsolete(false);
5069 u->markDeviceMemMapped(true);
5073 // TODO Is it really a good idea and was it tested well?
5074 // if map failed, switch to copy-on-map mode for the particular buffer
5075 u->flags |= UMatData::COPY_ON_MAP;
5080 u->data = (uchar*)fastMalloc(u->size);
5081 u->markHostCopyObsolete(true);
5085 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
5087 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5088 #ifdef HAVE_OPENCL_SVM
5089 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5091 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5092 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5093 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5094 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5095 u->markHostCopyObsolete(false);
5099 void unmap(UMatData* u) const CV_OVERRIDE
5105 CV_Assert(u->handle != 0);
5107 UMatDataAutoLock autolock(u);
5109 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5111 if( !u->copyOnMap() && u->deviceMemMapped() )
5113 CV_Assert(u->data != NULL);
5114 #ifdef HAVE_OPENCL_SVM
5115 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5117 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5119 Context& ctx = Context::getDefault();
5120 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5121 CV_DbgAssert(svmFns->isValid());
5123 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5125 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5126 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5128 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5130 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5133 if (u->refcount == 0)
5135 u->markDeviceCopyObsolete(false);
5136 u->markHostCopyObsolete(true);
5140 if (u->refcount == 0)
5142 CV_Assert(u->mapcount-- == 1);
5143 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5144 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5145 if (Device::getDefault().isAMD())
5147 // required for multithreaded applications (see stitching test)
5148 CV_OCL_DBG_CHECK(clFinish(q));
5150 u->markDeviceMemMapped(false);
5152 u->markDeviceCopyObsolete(false);
5153 u->markHostCopyObsolete(true);
5156 else if( u->copyOnMap() && u->deviceCopyObsolete() )
5158 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5159 #ifdef HAVE_OPENCL_SVM
5160 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5162 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5163 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5164 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5165 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5166 u->markDeviceCopyObsolete(false);
5167 u->markHostCopyObsolete(true);
5171 bool checkContinuous(int dims, const size_t sz[],
5172 const size_t srcofs[], const size_t srcstep[],
5173 const size_t dstofs[], const size_t dststep[],
5174 size_t& total, size_t new_sz[],
5175 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5176 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5178 bool iscontinuous = true;
5179 srcrawofs = srcofs ? srcofs[dims-1] : 0;
5180 dstrawofs = dstofs ? dstofs[dims-1] : 0;
5182 for( int i = dims-2; i >= 0; i-- )
5184 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5185 iscontinuous = false;
5188 srcrawofs += srcofs[i]*srcstep[i];
5190 dstrawofs += dstofs[i]*dststep[i];
5195 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5198 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5199 // we assume that new_... arrays are initialized by caller
5200 // with 0's, so there is no else branch
5203 new_srcofs[0] = srcofs[1];
5204 new_srcofs[1] = srcofs[0];
5210 new_dstofs[0] = dstofs[1];
5211 new_dstofs[1] = dstofs[0];
5215 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5216 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5220 // we could check for dims == 3 here,
5221 // but from user perspective this one is more informative
5222 CV_Assert(dims <= 3);
5223 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
5226 new_srcofs[0] = srcofs[2];
5227 new_srcofs[1] = srcofs[1];
5228 new_srcofs[2] = srcofs[0];
5233 new_dstofs[0] = dstofs[2];
5234 new_dstofs[1] = dstofs[1];
5235 new_dstofs[2] = dstofs[0];
5238 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
5239 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
5242 return iscontinuous;
5245 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
5246 const size_t srcofs[], const size_t srcstep[],
5247 const size_t dststep[]) const CV_OVERRIDE
5251 UMatDataAutoLock autolock(u);
5253 if( u->data && !u->hostCopyObsolete() )
5255 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
5258 CV_Assert( u->handle != 0 );
5260 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5262 size_t total = 0, new_sz[] = {0, 0, 0};
5263 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5264 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5266 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
5268 srcrawofs, new_srcofs, new_srcstep,
5269 dstrawofs, new_dstofs, new_dststep);
5271 #ifdef HAVE_OPENCL_SVM
5272 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5274 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5275 Context& ctx = Context::getDefault();
5276 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5277 CV_DbgAssert(svmFns->isValid());
5279 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5280 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5282 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5283 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5286 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5291 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
5295 // This code is from MatAllocator::download()
5296 int isz[CV_MAX_DIM];
5297 uchar* srcptr = (uchar*)u->handle;
5298 for( int i = 0; i < dims; i++ )
5300 CV_Assert( sz[i] <= (size_t)INT_MAX );
5304 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5305 isz[i] = (int)sz[i];
5308 Mat src(dims, isz, CV_8U, srcptr, srcstep);
5309 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5311 const Mat* arrays[] = { &src, &dst };
5313 NAryMatIterator it(arrays, ptrs, 2);
5314 size_t j, planesz = it.size;
5316 for( j = 0; j < it.nplanes; j++, ++it )
5317 memcpy(ptrs[1], ptrs[0], planesz);
5319 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5321 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5322 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5324 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5333 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5334 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5335 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
5337 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5339 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5340 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5341 size_t membuf_ofs = srcrawofs - new_srcrawofs;
5342 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5343 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5344 uchar* ptr = alignedPtr.getAlignedPtr();
5346 CV_Assert(new_srcstep[0] >= new_sz[0]);
5347 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
5348 total = std::min(total, u->size - new_srcrawofs);
5349 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5350 new_srcrawofs, total, ptr, 0, 0, 0));
5351 for( size_t i = 0; i < new_sz[1]; i++ )
5352 memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
5356 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5357 uchar* ptr = alignedPtr.getAlignedPtr();
5359 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5360 new_srcofs, new_dstofs, new_sz,
5368 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
5369 const size_t dstofs[], const size_t dststep[],
5370 const size_t srcstep[]) const CV_OVERRIDE
5375 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5376 CV_Assert(u->refcount == 0 || u->tempUMat());
5378 size_t total = 0, new_sz[] = {0, 0, 0};
5379 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5380 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5382 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
5384 srcrawofs, new_srcofs, new_srcstep,
5385 dstrawofs, new_dstofs, new_dststep);
5387 UMatDataAutoLock autolock(u);
5389 // if there is cached CPU copy of the GPU matrix,
5390 // we could use it as a destination.
5391 // we can do it in 2 cases:
5392 // 1. we overwrite the whole content
5393 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
5394 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
5396 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
5397 u->markHostCopyObsolete(false);
5398 u->markDeviceCopyObsolete(true);
5402 CV_Assert( u->handle != 0 );
5403 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5405 #ifdef HAVE_OPENCL_SVM
5406 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5408 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5409 Context& ctx = Context::getDefault();
5410 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5411 CV_DbgAssert(svmFns->isValid());
5413 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5414 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5416 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5417 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
5420 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5425 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
5429 // This code is from MatAllocator::upload()
5430 int isz[CV_MAX_DIM];
5431 uchar* dstptr = (uchar*)u->handle;
5432 for( int i = 0; i < dims; i++ )
5434 CV_Assert( sz[i] <= (size_t)INT_MAX );
5438 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
5439 isz[i] = (int)sz[i];
5442 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
5443 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5445 const Mat* arrays[] = { &src, &dst };
5447 NAryMatIterator it(arrays, ptrs, 2);
5448 size_t j, planesz = it.size;
5450 for( j = 0; j < it.nplanes; j++, ++it )
5451 memcpy(ptrs[1], ptrs[0], planesz);
5453 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5455 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5456 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5458 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5467 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5468 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5469 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
5470 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
5471 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5473 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5475 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5476 size_t new_dstrawofs = dstrawofs & ~(padding-1);
5477 size_t membuf_ofs = dstrawofs - new_dstrawofs;
5478 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
5479 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5480 uchar* ptr = alignedPtr.getAlignedPtr();
5482 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5483 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
5484 total = std::min(total, u->size - new_dstrawofs);
5485 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
5486 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
5487 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
5488 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5489 new_dstrawofs, total, ptr, 0, 0, 0));
5490 for( size_t i = 0; i < new_sz[1]; i++ )
5491 memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
5492 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5493 new_dstrawofs, total, ptr, 0, 0, 0));
5497 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5498 uchar* ptr = alignedPtr.getAlignedPtr();
5500 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5501 new_dstofs, new_srcofs, new_sz,
5507 u->markHostCopyObsolete(true);
5508 #ifdef HAVE_OPENCL_SVM
5509 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5510 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5517 u->markHostCopyObsolete(true);
5519 u->markDeviceCopyObsolete(false);
5522 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
5523 const size_t srcofs[], const size_t srcstep[],
5524 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
5529 size_t total = 0, new_sz[] = {0, 0, 0};
5530 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5531 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5533 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
5535 srcrawofs, new_srcofs, new_srcstep,
5536 dstrawofs, new_dstofs, new_dststep);
5538 UMatDataAutoLock src_autolock(src, dst);
5540 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
5542 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
5545 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
5547 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
5548 dst->markHostCopyObsolete(false);
5549 #ifdef HAVE_OPENCL_SVM
5550 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5551 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5558 dst->markDeviceCopyObsolete(true);
5563 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5564 CV_Assert(dst->refcount == 0);
5565 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5567 cl_int retval = CL_SUCCESS;
5568 #ifdef HAVE_OPENCL_SVM
5569 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
5570 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5572 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
5573 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5575 Context& ctx = Context::getDefault();
5576 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5577 CV_DbgAssert(svmFns->isValid());
5581 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
5582 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
5583 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
5584 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
5585 total, 0, NULL, NULL);
5586 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
5591 // This code is from MatAllocator::download()/upload()
5592 int isz[CV_MAX_DIM];
5593 uchar* srcptr = (uchar*)src->handle;
5594 for( int i = 0; i < dims; i++ )
5596 CV_Assert( sz[i] <= (size_t)INT_MAX );
5600 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5601 isz[i] = (int)sz[i];
5603 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
5605 uchar* dstptr = (uchar*)dst->handle;
5606 for( int i = 0; i < dims; i++ )
5609 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
5611 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
5613 const Mat* arrays[] = { &m_src, &m_dst };
5615 NAryMatIterator it(arrays, ptrs, 2);
5616 size_t j, planesz = it.size;
5618 for( j = 0; j < it.nplanes; j++, ++it )
5619 memcpy(ptrs[1], ptrs[0], planesz);
5624 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5626 map(src, ACCESS_READ);
5627 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
5632 map(dst, ACCESS_WRITE);
5633 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
5643 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
5644 srcrawofs, dstrawofs, total, 0, 0, 0);
5645 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
5646 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
5648 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5650 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5651 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5652 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
5653 size_t new_dstrawofs = dstrawofs & ~(padding-1);
5654 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
5656 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5657 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5658 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
5659 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5660 uchar* srcptr = srcBuf.getAlignedPtr();
5661 uchar* dstptr = dstBuf.getAlignedPtr();
5663 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5665 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
5666 src_total = std::min(src_total, src->size - new_srcrawofs);
5667 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
5668 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
5670 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
5671 new_srcrawofs, src_total, srcptr, 0, 0, 0));
5672 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5673 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5675 for( size_t i = 0; i < new_sz[1]; i++ )
5676 memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
5677 srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
5678 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5679 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5683 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
5684 new_srcofs, new_dstofs, new_sz,
5690 if (retval == CL_SUCCESS)
5692 CV_IMPL_ADD(CV_IMPL_OCL)
5695 #ifdef HAVE_OPENCL_SVM
5696 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5697 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5704 dst->markHostCopyObsolete(true);
5706 dst->markDeviceCopyObsolete(false);
5710 CV_OCL_DBG_CHECK(clFinish(q));
5714 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE {
5715 #ifdef HAVE_OPENCL_SVM
5716 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
5718 return &bufferPoolSVM;
5721 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
5723 return &bufferPoolHostPtr;
5725 if (id != NULL && strcmp(id, "OCL") != 0)
5727 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
5732 MatAllocator* matStdAllocator;
5734 mutable cv::Mutex cleanupQueueMutex;
5735 mutable std::deque<UMatData*> cleanupQueue;
5737 void flushCleanupQueue() const
5739 if (!cleanupQueue.empty())
5741 std::deque<UMatData*> q;
5743 cv::AutoLock lock(cleanupQueueMutex);
5744 q.swap(cleanupQueue);
5746 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
5752 void addToCleanupQueue(UMatData* u) const
5754 //TODO: Validation check: CV_Assert(!u->tempUMat());
5756 cv::AutoLock lock(cleanupQueueMutex);
5757 cleanupQueue.push_back(u);
5762 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
5764 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
5765 g_isOpenCVActivated = true;
5768 MatAllocator* getOpenCLAllocator()
5770 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
5773 }} // namespace cv::ocl
5778 // three funcs below are implemented in umatrix.cpp
5779 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
5780 bool autoSteps = false );
5781 void finalizeHdr(UMat& m);
5786 namespace cv { namespace ocl {
5789 // Convert OpenCL buffer memory to UMat
5791 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
5794 int sizes[] = { rows, cols };
5796 CV_Assert(0 <= d && d <= CV_MAX_DIM);
5800 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
5801 dst.usageFlags = USAGE_DEFAULT;
5803 setSize(dst, d, sizes, 0, true);
5806 cl_mem memobj = (cl_mem)cl_mem_buffer;
5807 cl_mem_object_type mem_type = 0;
5809 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
5811 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
5814 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
5816 CV_OCL_CHECK(clRetainMemObject(memobj));
5818 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
5819 CV_Assert(total >= rows * step);
5821 // attach clBuffer to UMatData
5822 dst.u = new UMatData(getOpenCLAllocator());
5824 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
5826 dst.u->handle = cl_mem_buffer;
5827 dst.u->origdata = 0;
5828 dst.u->prevAllocator = 0;
5829 dst.u->size = total;
5835 } // convertFromBuffer()
5839 // Convert OpenCL image2d_t memory to UMat
5841 void convertFromImage(void* cl_mem_image, UMat& dst)
5843 cl_mem clImage = (cl_mem)cl_mem_image;
5844 cl_mem_object_type mem_type = 0;
5846 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
5848 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
5850 cl_image_format fmt = { 0, 0 };
5851 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
5854 switch (fmt.image_channel_data_type)
5857 case CL_UNSIGNED_INT8:
5862 case CL_SIGNED_INT8:
5866 case CL_UNORM_INT16:
5867 case CL_UNSIGNED_INT16:
5871 case CL_SNORM_INT16:
5872 case CL_SIGNED_INT16:
5876 case CL_SIGNED_INT32:
5885 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
5889 switch (fmt.image_channel_order)
5892 type = CV_MAKE_TYPE(depth, 1);
5898 type = CV_MAKE_TYPE(depth, 4);
5902 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
5907 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
5910 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
5913 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
5915 dst.create((int)h, (int)w, type);
5917 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
5919 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5922 size_t src_origin[3] = { 0, 0, 0 };
5923 size_t region[3] = { w, h, 1 };
5924 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
5926 CV_OCL_CHECK(clFinish(q));
5929 } // convertFromImage()
5932 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
5934 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
5936 cl_uint numDevices = 0;
5937 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
5938 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
5940 CV_OCL_DBG_CHECK_RESULT(status,
5941 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
5944 if (numDevices == 0)
5950 devices.resize((size_t)numDevices);
5951 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
5954 struct PlatformInfo::Impl
5959 handle = *(cl_platform_id*)id;
5960 getDevices(devices, handle);
5963 String getStrProp(cl_platform_info prop) const
5967 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
5968 sz < sizeof(buf) ? String(buf) : String();
5971 IMPLEMENT_REFCOUNTABLE();
5972 std::vector<cl_device_id> devices;
5973 cl_platform_id handle;
5976 PlatformInfo::PlatformInfo()
5981 PlatformInfo::PlatformInfo(void* platform_id)
5983 p = new Impl(platform_id);
5986 PlatformInfo::~PlatformInfo()
5992 PlatformInfo::PlatformInfo(const PlatformInfo& i)
5999 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6012 int PlatformInfo::deviceNumber() const
6014 return p ? (int)p->devices.size() : 0;
6017 void PlatformInfo::getDevice(Device& device, int d) const
6019 CV_Assert(p && d < (int)p->devices.size() );
6021 device.set(p->devices[d]);
6024 String PlatformInfo::name() const
6026 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6029 String PlatformInfo::vendor() const
6031 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6034 String PlatformInfo::version() const
6036 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
6039 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6041 cl_uint numPlatforms = 0;
6042 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6044 if (numPlatforms == 0)
6050 platforms.resize((size_t)numPlatforms);
6051 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6054 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6056 std::vector<cl_platform_id> platforms;
6057 getPlatforms(platforms);
6059 for (size_t i = 0; i < platforms.size(); i++)
6060 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6063 const char* typeToStr(int type)
6065 static const char* tab[]=
6067 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6068 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6069 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6070 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6071 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6072 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6073 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6074 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6076 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6077 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6080 const char* memopTypeToStr(int type)
6082 static const char* tab[] =
6084 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6085 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6086 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6087 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6088 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6089 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6090 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6091 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6093 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6094 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6097 const char* vecopTypeToStr(int type)
6099 static const char* tab[] =
6101 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6102 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6103 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6104 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6105 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6106 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6107 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6108 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6110 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6111 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6114 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6116 if( sdepth == ddepth )
6118 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6119 if( ddepth >= CV_32F ||
6120 (ddepth == CV_32S && sdepth < CV_32S) ||
6121 (ddepth == CV_16S && sdepth <= CV_8S) ||
6122 (ddepth == CV_16U && sdepth == CV_8U))
6124 sprintf(buf, "convert_%s", typestr);
6126 else if( sdepth >= CV_32F )
6127 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6129 sprintf(buf, "convert_%s_sat", typestr);
6134 const char* getOpenCLErrorString(int errorCode)
6136 #define CV_OCL_CODE(id) case id: return #id
6137 #define CV_OCL_CODE_(id, name) case id: return #name
6140 CV_OCL_CODE(CL_SUCCESS);
6141 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6142 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6143 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6144 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6145 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6146 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6147 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6148 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6149 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6150 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6151 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
6152 CV_OCL_CODE(CL_MAP_FAILURE);
6153 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
6154 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
6155 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
6156 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
6157 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
6158 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
6159 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
6160 CV_OCL_CODE(CL_INVALID_VALUE);
6161 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
6162 CV_OCL_CODE(CL_INVALID_PLATFORM);
6163 CV_OCL_CODE(CL_INVALID_DEVICE);
6164 CV_OCL_CODE(CL_INVALID_CONTEXT);
6165 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
6166 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
6167 CV_OCL_CODE(CL_INVALID_HOST_PTR);
6168 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
6169 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
6170 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
6171 CV_OCL_CODE(CL_INVALID_SAMPLER);
6172 CV_OCL_CODE(CL_INVALID_BINARY);
6173 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
6174 CV_OCL_CODE(CL_INVALID_PROGRAM);
6175 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
6176 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
6177 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
6178 CV_OCL_CODE(CL_INVALID_KERNEL);
6179 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
6180 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
6181 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
6182 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
6183 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
6184 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
6185 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
6186 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
6187 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
6188 CV_OCL_CODE(CL_INVALID_EVENT);
6189 CV_OCL_CODE(CL_INVALID_OPERATION);
6190 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
6191 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
6192 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
6193 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
6195 CV_OCL_CODE(CL_INVALID_PROPERTY);
6197 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
6198 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
6199 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
6200 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
6202 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
6203 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
6205 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
6206 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
6207 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
6208 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
6209 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
6210 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
6211 default: return "Unknown OpenCL error";
6217 template <typename T>
6218 static std::string kerToStr(const Mat & k)
6220 int width = k.cols - 1, depth = k.depth();
6221 const T * const data = k.ptr<T>();
6223 std::ostringstream stream;
6224 stream.precision(10);
6228 for (int i = 0; i < width; ++i)
6229 stream << "DIG(" << (int)data[i] << ")";
6230 stream << "DIG(" << (int)data[width] << ")";
6232 else if (depth == CV_32F)
6234 stream.setf(std::ios_base::showpoint);
6235 for (int i = 0; i < width; ++i)
6236 stream << "DIG(" << data[i] << "f)";
6237 stream << "DIG(" << data[width] << "f)";
6241 for (int i = 0; i < width; ++i)
6242 stream << "DIG(" << data[i] << ")";
6243 stream << "DIG(" << data[width] << ")";
6246 return stream.str();
6249 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
6251 Mat kernel = _kernel.getMat().reshape(1, 1);
6253 int depth = kernel.depth();
6257 if (ddepth != depth)
6258 kernel.convertTo(kernel, ddepth);
6260 typedef std::string (* func_t)(const Mat &);
6261 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
6262 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
6263 const func_t func = funcs[ddepth];
6264 CV_Assert(func != 0);
6266 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
6269 #define PROCESS_SRC(src) \
6274 CV_Assert(src.isMat() || src.isUMat()); \
6275 Size csize = src.size(); \
6276 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
6277 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
6278 if (cwidth < ckercn || ckercn <= 0) \
6280 cols.push_back(cwidth); \
6281 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
6283 offsets.push_back(src.offset()); \
6284 steps.push_back(src.step()); \
6285 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
6286 kercns.push_back(ckercn); \
6291 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
6292 InputArray src4, InputArray src5, InputArray src6,
6293 InputArray src7, InputArray src8, InputArray src9,
6294 OclVectorStrategy strat)
6296 const ocl::Device & d = ocl::Device::getDefault();
6298 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
6299 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
6300 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
6301 d.preferredVectorWidthDouble(), -1 };
6303 // if the device says don't use vectors
6304 if (vectorWidths[0] == 1)
6307 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
6308 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
6309 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
6312 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
6315 int checkOptimalVectorWidth(const int *vectorWidths,
6316 InputArray src1, InputArray src2, InputArray src3,
6317 InputArray src4, InputArray src5, InputArray src6,
6318 InputArray src7, InputArray src8, InputArray src9,
6319 OclVectorStrategy strat)
6321 CV_Assert(vectorWidths);
6323 int ref_type = src1.type();
6325 std::vector<size_t> offsets, steps, cols;
6326 std::vector<int> dividers, kercns;
6337 size_t size = offsets.size();
6339 for (size_t i = 0; i < size; ++i)
6340 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
6341 dividers[i] >>= 1, kercns[i] >>= 1;
6344 int kercn = *std::min_element(kercns.begin(), kercns.end());
6349 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
6350 InputArray src4, InputArray src5, InputArray src6,
6351 InputArray src7, InputArray src8, InputArray src9)
6353 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
6359 // TODO Make this as a method of OpenCL "BuildOptions" class
6360 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
6362 if (!buildOptions.empty())
6363 buildOptions += " ";
6364 int type = _m.type(), depth = CV_MAT_DEPTH(type);
6365 buildOptions += format(
6366 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
6367 name.c_str(), ocl::typeToStr(type),
6368 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
6369 name.c_str(), (int)CV_MAT_CN(type),
6370 name.c_str(), (int)CV_ELEM_SIZE(type),
6371 name.c_str(), (int)CV_ELEM_SIZE1(type),
6372 name.c_str(), (int)depth
6377 struct Image2D::Impl
6379 Impl(const UMat &src, bool norm, bool alias)
6383 init(src, norm, alias);
6389 clReleaseMemObject(handle);
6392 static cl_image_format getImageFormat(int depth, int cn, bool norm)
6394 cl_image_format format;
6395 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
6396 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
6397 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
6398 CL_SNORM_INT16, -1, -1, -1, -1 };
6399 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
6401 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
6402 int channelOrder = channelOrders[cn];
6403 format.image_channel_data_type = (cl_channel_type)channelType;
6404 format.image_channel_order = (cl_channel_order)channelOrder;
6408 static bool isFormatSupported(cl_image_format format)
6411 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
6413 cl_context context = (cl_context)Context::getDefault().ptr();
6417 // Figure out how many formats are supported by this context.
6418 cl_uint numFormats = 0;
6419 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6420 CL_MEM_OBJECT_IMAGE2D, numFormats,
6422 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
6425 AutoBuffer<cl_image_format> formats(numFormats);
6426 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6427 CL_MEM_OBJECT_IMAGE2D, numFormats,
6428 formats.data(), NULL);
6429 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
6430 for (cl_uint i = 0; i < numFormats; ++i)
6432 if (!memcmp(&formats[i], &format, sizeof(format)))
6441 void init(const UMat &src, bool norm, bool alias)
6444 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
6446 CV_Assert(!src.empty());
6447 CV_Assert(ocl::Device::getDefault().imageSupport());
6449 int err, depth = src.depth(), cn = src.channels();
6451 cl_image_format format = getImageFormat(depth, cn, norm);
6453 if (!isFormatSupported(format))
6454 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
6456 if (alias && !src.handle(ACCESS_RW))
6457 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
6459 cl_context context = (cl_context)Context::getDefault().ptr();
6460 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
6462 #ifdef CL_VERSION_1_2
6463 // this enables backwards portability to
6464 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
6465 const Device & d = ocl::Device::getDefault();
6466 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
6467 CV_Assert(!alias || canCreateAlias(src));
6468 if (1 < major || (1 == major && 2 <= minor))
6471 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
6472 desc.image_width = src.cols;
6473 desc.image_height = src.rows;
6474 desc.image_depth = 0;
6475 desc.image_array_size = 1;
6476 desc.image_row_pitch = alias ? src.step[0] : 0;
6477 desc.image_slice_pitch = 0;
6478 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
6479 desc.num_mip_levels = 0;
6480 desc.num_samples = 0;
6481 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
6486 CV_SUPPRESS_DEPRECATED_START
6487 CV_Assert(!alias); // This is an OpenCL 1.2 extension
6488 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
6489 CV_SUPPRESS_DEPRECATED_END
6491 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
6493 size_t origin[] = { 0, 0, 0 };
6494 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
6497 if (!alias && !src.isContinuous())
6499 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
6500 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
6501 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
6504 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
6505 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
6506 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
6507 CV_OCL_DBG_CHECK(clFlush(queue));
6511 devData = (cl_mem)src.handle(ACCESS_READ);
6513 CV_Assert(devData != NULL);
6517 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
6518 if (!src.isContinuous())
6520 CV_OCL_DBG_CHECK(clFlush(queue));
6521 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
6526 IMPLEMENT_REFCOUNTABLE();
6536 Image2D::Image2D(const UMat &src, bool norm, bool alias)
6538 p = new Impl(src, norm, alias);
6541 bool Image2D::canCreateAlias(const UMat &m)
6544 const Device & d = ocl::Device::getDefault();
6545 if (d.imageFromBufferSupport() && !m.empty())
6547 // This is the required pitch alignment in pixels
6548 uint pitchAlign = d.imagePitchAlignment();
6549 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
6551 // We don't currently handle the case where the buffer was created
6552 // with CL_MEM_USE_HOST_PTR
6553 if (!m.u->tempUMat())
6562 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
6564 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
6566 return Impl::isFormatSupported(format);
6569 Image2D::Image2D(const Image2D & i)
6576 Image2D & Image2D::operator = (const Image2D & i)
6595 void* Image2D::ptr() const
6597 return p ? p->handle : 0;
6600 bool internal::isOpenCLForced()
6602 static bool initialized = false;
6603 static bool value = false;
6606 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
6612 bool internal::isPerformanceCheckBypassed()
6614 static bool initialized = false;
6615 static bool value = false;
6618 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
6624 bool internal::isCLBuffer(UMat& u)
6626 void* h = u.handle(ACCESS_RW);
6629 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
6631 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
6634 cl_mem_object_type type = 0;
6635 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
6636 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
6646 Impl(const Queue& q)
6655 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
6661 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
6665 uint64 durationNS() const
6667 return (uint64)(timer.getTimeSec() * 1e9);
6673 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
6674 Timer::~Timer() { delete p; }
6688 uint64 Timer::durationNS() const
6691 return p->durationNS();
6696 #endif // HAVE_OPENCL