1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
45 #include "ocl_disabled.impl.hpp"
54 #include <iostream> // std::cerr
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
60 #include <opencv2/core/utils/configuration.private.hpp>
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
80 #define CV_OPENCL_SHOW_RUN_KERNELS 0
81 #define CV_OPENCL_TRACE_CHECK 0
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
86 #define CV_OPENCL_SHOW_SVM_LOG 0
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
91 # define LOG_BUFFER_POOL printf
93 # define LOG_BUFFER_POOL(...)
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
101 #define CV_OPENCL_SVM_TRACE_P(...)
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
111 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
116 #ifdef HAVE_OPENCL_SVM
117 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
119 #include "opencv2/core/opencl/opencl_svm.hpp"
122 #include "umatrix.hpp"
124 namespace cv { namespace ocl {
126 #define IMPLEMENT_REFCOUNTABLE() \
127 void addref() { CV_XADD(&refcount, 1); } \
128 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
131 static cv::utils::AllocatorStatistics opencl_allocator_stats;
133 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
134 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
136 return opencl_allocator_stats;
140 static bool isRaiseError()
142 static bool initialized = false;
143 static bool value = false;
146 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
153 #if CV_OPENCL_TRACE_CHECK
155 void traceOpenCLCheck(cl_int status, const char* message)
157 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
159 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
161 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
164 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
165 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
167 #define CV_OCL_CHECK_RESULT(check_result, msg) \
169 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
170 if (check_result != CL_SUCCESS) \
172 if (0) { const char* msg_ = (msg); CV_UNUSED(msg_); /* ensure const char* type (cv::String without c_str()) */ } \
173 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
174 CV_Error(Error::OpenCLApiCallError, error_msg); \
178 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
180 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
183 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
184 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
185 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
189 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
190 if (check_result != CL_SUCCESS && isRaiseError()) \
192 if (0) { const char* msg_ = (msg); CV_UNUSED(msg_); /* ensure const char* type (cv::String without c_str()) */ } \
193 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
194 CV_Error(Error::OpenCLApiCallError, error_msg); \
197 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
198 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
202 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
203 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
204 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
205 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
207 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
208 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
211 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
212 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
220 static const String getBuildExtraOptions()
222 static String param_buildExtraOptions;
223 static bool initialized = false;
226 param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
228 if (!param_buildExtraOptions.empty())
229 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
231 return param_buildExtraOptions;
234 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
235 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
240 UMat2D(const UMat& m)
242 offset = (int)m.offset;
255 UMat3D(const UMat& m)
257 offset = (int)m.offset;
258 step = (int)m.step.p[1];
259 slicestep = (int)m.step.p[0];
260 slices = (int)m.size.p[0];
272 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
273 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
275 static uint64 table[256];
276 static bool initialized = false;
280 for( int i = 0; i < 256; i++ )
283 for( int j = 0; j < 8; j++ )
284 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
291 for( size_t idx = 0; idx < size; idx++ )
292 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
297 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
298 struct OpenCLBinaryCacheConfigurator
300 cv::String cache_path_;
301 cv::String cache_lock_filename_;
302 cv::Ptr<utils::fs::FileLock> cache_lock_;
304 typedef std::map<std::string, std::string> ContextCacheType;
305 ContextCacheType prepared_contexts_;
306 Mutex mutex_prepared_contexts_;
308 OpenCLBinaryCacheConfigurator()
310 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
311 if (!CV_OPENCL_CACHE_ENABLE)
313 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
316 cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
317 if (cache_path_.empty())
319 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
325 if (cache_path_.empty())
327 if (cache_path_ == "disabled")
329 if (!utils::fs::createDirectories(cache_path_))
331 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
336 if (CV_OPENCL_CACHE_LOCK_ENABLE)
338 cache_lock_filename_ = cache_path_ + ".lock";
339 if (!utils::fs::exists(cache_lock_filename_))
341 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
342 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
343 if (!lock_filename.is_open())
345 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
352 cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
353 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
355 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
357 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
359 catch (const cv::Exception& e)
361 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
365 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
370 if (CV_OPENCL_CACHE_WRITE)
372 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
373 "(not safe for multiprocess environment)");
377 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
381 catch (const cv::Exception& e)
383 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
387 if (!cache_path_.empty())
389 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
391 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
392 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
396 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
404 cache_lock_filename_.clear();
405 cache_lock_.release();
408 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
409 const std::string& cleanup_prefix)
411 if (cache_path_.empty())
412 return std::string();
414 AutoLock lock(mutex_prepared_contexts_);
416 ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
417 if (found_it != prepared_contexts_.end())
418 return found_it->second;
420 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
422 std::string target_directory = cache_path_ + ctx_prefix + "/";
423 bool result = utils::fs::isDirectory(target_directory);
428 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
429 if (utils::fs::createDirectories(target_directory))
435 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
438 catch (const cv::Exception& e)
440 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
443 target_directory = result ? target_directory : std::string();
444 prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
446 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
450 std::vector<String> entries;
451 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
452 std::vector<String> remove_entries;
453 for (size_t i = 0; i < entries.size(); i++)
455 const String& name = entries[i];
456 if (0 == name.find(cleanup_prefix))
458 if (0 == name.find(ctx_prefix))
459 continue; // skip current
460 remove_entries.push_back(name);
463 if (!remove_entries.empty())
465 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
466 ? "Detected OpenCL cache directory for other version of OpenCL device."
467 : "Detected OpenCL cache directories for other versions of OpenCL device.")
468 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
469 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
470 for (size_t i = 0; i < remove_entries.size(); i++)
472 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
474 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
476 for (size_t i = 0; i < remove_entries.size(); i++)
478 const String& name = remove_entries[i];
479 cv::String path = utils::fs::join(cache_path_, name);
482 utils::fs::remove_all(path);
483 CV_LOG_WARNING(NULL, "Removed: " << path);
485 catch (const cv::Exception& e)
487 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
494 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
498 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
499 return target_directory;
502 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
504 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
507 class BinaryProgramFile
509 enum { MAX_ENTRIES = 64 };
511 typedef unsigned int uint32_t;
513 struct CV_DECL_ALIGNED(4) FileHeader
515 uint32_t sourceSignatureSize;
516 //char sourceSignature[];
519 struct CV_DECL_ALIGNED(4) FileTable
521 uint32_t numberOfEntries;
522 //uint32_t firstEntryOffset[];
525 struct CV_DECL_ALIGNED(4) FileEntry
527 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
534 const std::string fileName_;
535 const char* const sourceSignature_;
536 const size_t sourceSignatureSize_;
540 uint32_t entryOffsets[MAX_ENTRIES];
542 uint32_t getHash(const std::string& options)
544 uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
545 return hash & (MAX_ENTRIES - 1);
548 inline size_t getFileSize()
550 size_t pos = (size_t)f.tellg();
551 f.seekg(0, std::fstream::end);
552 size_t fileSize = (size_t)f.tellg();
553 f.seekg(pos, std::fstream::beg);
556 inline uint32_t readUInt32()
559 f.read((char*)&res, sizeof(uint32_t));
560 CV_Assert(!f.fail());
563 inline void writeUInt32(const uint32_t value)
566 f.write((char*)&v, sizeof(uint32_t));
567 CV_Assert(!f.fail());
570 inline void seekReadAbsolute(size_t pos)
572 f.seekg(pos, std::fstream::beg);
573 CV_Assert(!f.fail());
575 inline void seekReadRelative(size_t pos)
577 f.seekg(pos, std::fstream::cur);
578 CV_Assert(!f.fail());
581 inline void seekWriteAbsolute(size_t pos)
583 f.seekp(pos, std::fstream::beg);
584 CV_Assert(!f.fail());
590 if (0 != remove(fileName_.c_str()))
591 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
596 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
597 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
599 CV_StaticAssert(sizeof(uint32_t) == 4, "");
600 CV_Assert(sourceSignature_ != NULL);
601 CV_Assert(sourceSignatureSize_ > 0);
602 memset(entryOffsets, 0, sizeof(entryOffsets));
604 f.rdbuf()->pubsetbuf(0, 0); // disable buffering
605 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
606 if(f.is_open() && getFileSize() > 0)
608 bool isValid = false;
611 uint32_t fileSourceSignatureSize = readUInt32();
612 if (fileSourceSignatureSize == sourceSignatureSize_)
614 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
615 f.read(fileSourceSignature.data(), fileSourceSignatureSize);
618 CV_LOG_ERROR(NULL, "Unexpected EOF");
620 else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
627 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
630 catch (const cv::Exception& e)
632 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
636 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
649 bool read(const std::string& key, std::vector<char>& buf)
654 size_t fileSize = getFileSize();
657 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
664 uint32_t fileSourceSignatureSize = readUInt32();
665 CV_Assert(fileSourceSignatureSize > 0);
666 seekReadRelative(fileSourceSignatureSize);
668 uint32_t numberOfEntries = readUInt32();
669 CV_Assert(numberOfEntries > 0);
670 if (numberOfEntries != MAX_ENTRIES)
672 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
676 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
677 CV_Assert(!f.fail());
679 uint32_t entryNum = getHash(key);
681 uint32_t entryOffset = entryOffsets[entryNum];
683 while (entryOffset > 0)
685 seekReadAbsolute(entryOffset);
686 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
687 f.read((char*)&entry, sizeof(entry));
688 CV_Assert(!f.fail());
689 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
690 if (key.size() == entry.keySize)
692 if (entry.keySize > 0)
694 f.read(fileKey.data(), entry.keySize);
695 CV_Assert(!f.fail());
697 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
699 buf.resize(entry.dataSize);
700 f.read(&buf[0], entry.dataSize);
701 CV_Assert(!f.fail());
703 CV_LOG_VERBOSE(NULL, 0, "Read...");
707 if (entry.nextEntryFileOffset == 0)
709 entryOffset = entry.nextEntryFileOffset;
714 bool write(const std::string& key, std::vector<char>& buf)
718 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
721 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
724 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
730 size_t fileSize = getFileSize();
734 seekWriteAbsolute(0);
735 writeUInt32((uint32_t)sourceSignatureSize_);
736 f.write(sourceSignature_, sourceSignatureSize_);
737 CV_Assert(!f.fail());
739 writeUInt32(MAX_ENTRIES);
740 memset(entryOffsets, 0, sizeof(entryOffsets));
741 f.write((char*)entryOffsets, sizeof(entryOffsets));
742 CV_Assert(!f.fail());
744 CV_Assert(!f.fail());
746 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
747 CV_Assert(f.is_open());
748 fileSize = getFileSize();
753 uint32_t fileSourceSignatureSize = readUInt32();
754 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
755 seekReadRelative(fileSourceSignatureSize);
757 uint32_t numberOfEntries = readUInt32();
758 CV_Assert(numberOfEntries > 0);
759 if (numberOfEntries != MAX_ENTRIES)
761 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
765 size_t tableEntriesOffset = (size_t)f.tellg();
766 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
767 CV_Assert(!f.fail());
769 uint32_t entryNum = getHash(key);
771 uint32_t entryOffset = entryOffsets[entryNum];
773 while (entryOffset > 0)
775 seekReadAbsolute(entryOffset);
776 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
777 f.read((char*)&entry, sizeof(entry));
778 CV_Assert(!f.fail());
779 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
780 if (key.size() == entry.keySize)
782 if (entry.keySize > 0)
784 f.read(fileKey.data(), entry.keySize);
785 CV_Assert(!f.fail());
787 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
790 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
794 if (entry.nextEntryFileOffset == 0)
796 entryOffset = entry.nextEntryFileOffset;
801 seekWriteAbsolute(entryOffset);
802 entry.nextEntryFileOffset = (uint32_t)fileSize;
803 f.write((char*)&entry, sizeof(entry));
804 CV_Assert(!f.fail());
808 entryOffsets[entryNum] = (uint32_t)fileSize;
809 seekWriteAbsolute(tableEntriesOffset);
810 f.write((char*)entryOffsets, sizeof(entryOffsets));
811 CV_Assert(!f.fail());
813 seekWriteAbsolute(fileSize);
814 entry.nextEntryFileOffset = 0;
815 entry.dataSize = (uint32_t)buf.size();
816 entry.keySize = (uint32_t)key.size();
817 f.write((char*)&entry, sizeof(entry));
818 CV_Assert(!f.fail());
819 f.write(key.c_str(), entry.keySize);
820 CV_Assert(!f.fail());
821 f.write(&buf[0], entry.dataSize);
822 CV_Assert(!f.fail());
824 CV_Assert(!f.fail());
825 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
829 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
832 // true if we have initialized OpenCL subsystem with available platforms
833 static bool g_isOpenCVActivated = false;
838 static bool g_isOpenCLInitialized = false;
839 static bool g_isOpenCLAvailable = false;
841 if (!g_isOpenCLInitialized)
843 CV_TRACE_REGION("Init_OpenCL_Runtime");
844 const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
847 if (cv::String(envPath) == "disabled")
849 g_isOpenCLAvailable = false;
850 g_isOpenCLInitialized = true;
853 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
857 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
858 g_isOpenCVActivated = n > 0;
862 g_isOpenCLAvailable = false;
864 g_isOpenCLInitialized = true;
866 return g_isOpenCLAvailable;
871 CoreTLSData& data = getCoreTlsData();
872 if (data.useOpenCL < 0)
876 data.useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
883 return data.useOpenCL > 0;
886 bool isOpenCLActivated()
888 if (!g_isOpenCVActivated)
889 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
893 void setUseOpenCL(bool flag)
897 CoreTLSData& data = getCoreTlsData();
902 else if( haveOpenCL() )
904 data.useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0;
908 #ifdef HAVE_CLAMDBLAS
913 static AmdBlasHelper & getInstance()
915 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
918 bool isAvailable() const
920 return g_isAmdBlasAvailable;
935 if (!g_isAmdBlasInitialized)
937 AutoLock lock(getInitializationMutex());
939 if (!g_isAmdBlasInitialized)
945 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
949 g_isAmdBlasAvailable = false;
953 g_isAmdBlasAvailable = false;
955 g_isAmdBlasInitialized = true;
961 static bool g_isAmdBlasInitialized;
962 static bool g_isAmdBlasAvailable;
965 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
966 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
970 return AmdBlasHelper::getInstance().isAvailable();
987 static AmdFftHelper & getInstance()
989 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
992 bool isAvailable() const
994 return g_isAmdFftAvailable;
1001 // clAmdFftTeardown();
1009 if (!g_isAmdFftInitialized)
1011 AutoLock lock(getInitializationMutex());
1013 if (!g_isAmdFftInitialized)
1019 cl_uint major, minor, patch;
1020 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1022 // it throws exception in case AmdFft binaries are not found
1023 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1024 g_isAmdFftAvailable = true;
1026 catch (const Exception &)
1028 g_isAmdFftAvailable = false;
1032 g_isAmdFftAvailable = false;
1034 g_isAmdFftInitialized = true;
1040 static clAmdFftSetupData setupData;
1041 static bool g_isAmdFftInitialized;
1042 static bool g_isAmdFftAvailable;
1045 clAmdFftSetupData AmdFftHelper::setupData;
1046 bool AmdFftHelper::g_isAmdFftAvailable = false;
1047 bool AmdFftHelper::g_isAmdFftInitialized = false;
1051 return AmdFftHelper::getInstance().isAvailable();
1065 #ifdef HAVE_OPENCL_SVM
1074 Queue::getDefault().finish();
1077 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1079 struct Platform::Impl
1085 initialized = false;
1094 //cl_uint num_entries
1096 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1102 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1104 vendor = String(buf);
1111 IMPLEMENT_REFCOUNTABLE();
1113 cl_platform_id handle;
1118 Platform::Platform()
1123 Platform::~Platform()
1129 Platform::Platform(const Platform& pl)
1136 Platform& Platform::operator = (const Platform& pl)
1138 Impl* newp = (Impl*)pl.p;
1147 void* Platform::ptr() const
1149 return p ? p->handle : 0;
1152 Platform& Platform::getDefault()
1163 /////////////////////////////////////// Device ////////////////////////////////////////////
1165 // Version has format:
1166 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1168 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1169 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1170 // https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html
1171 // https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html
1172 static void parseOpenCLVersion(const String &version, int &major, int &minor)
1175 if (10 >= version.length())
1177 const char *pstr = version.c_str();
1178 if (0 != strncmp(pstr, "OpenCL ", 7))
1180 size_t ppos = version.find('.', 7);
1181 if (String::npos == ppos)
1183 String temp = version.substr(7, ppos - 7);
1184 major = atoi(temp.c_str());
1185 temp = version.substr(ppos + 1);
1186 minor = atoi(temp.c_str());
1193 handle = (cl_device_id)d;
1196 name_ = getStrProp(CL_DEVICE_NAME);
1197 version_ = getStrProp(CL_DEVICE_VERSION);
1198 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1199 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1200 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1201 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1202 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1203 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1204 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1205 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1207 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1208 parseOpenCLVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1211 while (pos < extensions_.size())
1213 size_t pos2 = extensions_.find(' ', pos);
1214 if (pos2 == String::npos)
1215 pos2 = extensions_.size();
1218 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1219 extensions_set_.insert(extensionName);
1224 intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1226 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1227 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1228 vendorName_ == "AMD")
1229 vendorID_ = VENDOR_AMD;
1230 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1231 vendorID_ = VENDOR_INTEL;
1232 else if (vendorName_ == "NVIDIA Corporation")
1233 vendorID_ = VENDOR_NVIDIA;
1235 vendorID_ = UNKNOWN_VENDOR;
1237 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1238 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1240 const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1241 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1242 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1243 maxWorkGroupSize_ = new_maxWorkGroupSize;
1246 if (isExtensionSupported("cl_khr_spir"))
1248 #ifndef CL_DEVICE_SPIR_VERSIONS
1249 #define CL_DEVICE_SPIR_VERSIONS 0x40E0
1251 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1252 std::cout << spir_versions << std::endl;
1257 template<typename _TpCL, typename _TpOut>
1258 _TpOut getProp(cl_device_info prop) const
1263 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1264 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1267 bool getBoolProp(cl_device_info prop) const
1269 cl_bool temp = CL_FALSE;
1272 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1273 sz == sizeof(temp) ? temp != 0 : false;
1276 String getStrProp(cl_device_info prop) const
1280 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1281 sz < sizeof(buf) ? String(buf) : String();
1284 bool isExtensionSupported(const std::string& extensionName) const
1286 return extensions_set_.count(extensionName) > 0;
1290 IMPLEMENT_REFCOUNTABLE();
1292 cl_device_id handle;
1296 std::string extensions_;
1297 int doubleFPConfig_;
1298 bool hostUnifiedMemory_;
1299 int maxComputeUnits_;
1300 size_t maxWorkGroupSize_;
1303 int deviceVersionMajor_;
1304 int deviceVersionMinor_;
1305 String driverVersion_;
1308 bool intelSubgroupsSupport_;
1310 std::set<std::string> extensions_set_;
1319 Device::Device(void* d)
1325 Device::Device(const Device& d)
1332 Device& Device::operator = (const Device& d)
1334 Impl* newp = (Impl*)d.p;
1349 void Device::set(void* d)
1356 void* Device::ptr() const
1358 return p ? p->handle : 0;
1361 String Device::name() const
1362 { return p ? p->name_ : String(); }
1364 String Device::extensions() const
1365 { return p ? String(p->extensions_) : String(); }
1367 bool Device::isExtensionSupported(const String& extensionName) const
1368 { return p ? p->isExtensionSupported(extensionName) : false; }
1370 String Device::version() const
1371 { return p ? p->version_ : String(); }
1373 String Device::vendorName() const
1374 { return p ? p->vendorName_ : String(); }
1376 int Device::vendorID() const
1377 { return p ? p->vendorID_ : 0; }
1379 String Device::OpenCL_C_Version() const
1380 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1382 String Device::OpenCLVersion() const
1383 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1385 int Device::deviceVersionMajor() const
1386 { return p ? p->deviceVersionMajor_ : 0; }
1388 int Device::deviceVersionMinor() const
1389 { return p ? p->deviceVersionMinor_ : 0; }
1391 String Device::driverVersion() const
1392 { return p ? p->driverVersion_ : String(); }
1394 int Device::type() const
1395 { return p ? p->type_ : 0; }
1397 int Device::addressBits() const
1398 { return p ? p->addressBits_ : 0; }
1400 bool Device::available() const
1401 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1403 bool Device::compilerAvailable() const
1404 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1406 bool Device::linkerAvailable() const
1407 #ifdef CL_VERSION_1_2
1408 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1410 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1413 int Device::doubleFPConfig() const
1414 { return p ? p->doubleFPConfig_ : 0; }
1416 int Device::singleFPConfig() const
1417 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1419 int Device::halfFPConfig() const
1420 #ifdef CL_VERSION_1_2
1421 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1423 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1426 bool Device::endianLittle() const
1427 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1429 bool Device::errorCorrectionSupport() const
1430 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1432 int Device::executionCapabilities() const
1433 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1435 size_t Device::globalMemCacheSize() const
1436 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1438 int Device::globalMemCacheType() const
1439 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1441 int Device::globalMemCacheLineSize() const
1442 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1444 size_t Device::globalMemSize() const
1445 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1447 size_t Device::localMemSize() const
1448 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1450 int Device::localMemType() const
1451 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1453 bool Device::hostUnifiedMemory() const
1454 { return p ? p->hostUnifiedMemory_ : false; }
1456 bool Device::imageSupport() const
1457 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1459 bool Device::imageFromBufferSupport() const
1461 return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1464 uint Device::imagePitchAlignment() const
1466 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1467 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1473 uint Device::imageBaseAddressAlignment() const
1475 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1476 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1482 size_t Device::image2DMaxWidth() const
1483 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1485 size_t Device::image2DMaxHeight() const
1486 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1488 size_t Device::image3DMaxWidth() const
1489 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1491 size_t Device::image3DMaxHeight() const
1492 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1494 size_t Device::image3DMaxDepth() const
1495 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1497 size_t Device::imageMaxBufferSize() const
1498 #ifdef CL_VERSION_1_2
1499 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1501 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1504 size_t Device::imageMaxArraySize() const
1505 #ifdef CL_VERSION_1_2
1506 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1508 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1511 bool Device::intelSubgroupsSupport() const
1512 { return p ? p->intelSubgroupsSupport_ : false; }
1514 int Device::maxClockFrequency() const
1515 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1517 int Device::maxComputeUnits() const
1518 { return p ? p->maxComputeUnits_ : 0; }
1520 int Device::maxConstantArgs() const
1521 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1523 size_t Device::maxConstantBufferSize() const
1524 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1526 size_t Device::maxMemAllocSize() const
1527 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1529 size_t Device::maxParameterSize() const
1530 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1532 int Device::maxReadImageArgs() const
1533 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1535 int Device::maxWriteImageArgs() const
1536 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1538 int Device::maxSamplers() const
1539 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1541 size_t Device::maxWorkGroupSize() const
1542 { return p ? p->maxWorkGroupSize_ : 0; }
1544 int Device::maxWorkItemDims() const
1545 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1547 void Device::maxWorkItemSizes(size_t* sizes) const
1551 const int MAX_DIMS = 32;
1553 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1554 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1558 int Device::memBaseAddrAlign() const
1559 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1561 int Device::nativeVectorWidthChar() const
1562 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1564 int Device::nativeVectorWidthShort() const
1565 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1567 int Device::nativeVectorWidthInt() const
1568 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1570 int Device::nativeVectorWidthLong() const
1571 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1573 int Device::nativeVectorWidthFloat() const
1574 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1576 int Device::nativeVectorWidthDouble() const
1577 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1579 int Device::nativeVectorWidthHalf() const
1580 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1582 int Device::preferredVectorWidthChar() const
1583 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1585 int Device::preferredVectorWidthShort() const
1586 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1588 int Device::preferredVectorWidthInt() const
1589 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1591 int Device::preferredVectorWidthLong() const
1592 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
1594 int Device::preferredVectorWidthFloat() const
1595 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
1597 int Device::preferredVectorWidthDouble() const
1598 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
1600 int Device::preferredVectorWidthHalf() const
1601 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
1603 size_t Device::printfBufferSize() const
1604 #ifdef CL_VERSION_1_2
1605 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
1607 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1611 size_t Device::profilingTimerResolution() const
1612 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
1614 const Device& Device::getDefault()
1616 const Context& ctx = Context::getDefault();
1617 int idx = getCoreTlsData().device;
1618 const Device& device = ctx.device(idx);
1622 ////////////////////////////////////// Context ///////////////////////////////////////////////////
1624 template <typename Functor, typename ObjectType>
1625 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
1628 cl_int err = f(obj, name, 0, NULL, &required);
1629 if (err != CL_SUCCESS)
1635 AutoBuffer<char> buf(required + 1);
1636 char* ptr = buf.data(); // cleanup is not needed
1637 err = f(obj, name, required, ptr, NULL);
1638 if (err != CL_SUCCESS)
1646 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
1651 std::istringstream ss(s);
1655 std::getline(ss, item, delim);
1656 elems.push_back(item);
1660 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
1662 // Sample: AMD:GPU:Tahiti
1663 // Sample: :GPU|CPU: = '' = ':' = '::'
1664 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
1665 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
1667 std::vector<std::string> parts;
1668 split(configurationStr, ':', parts);
1669 if (parts.size() > 3)
1671 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
1674 if (parts.size() > 2)
1675 deviceNameOrID = parts[2];
1676 if (parts.size() > 1)
1678 split(parts[1], '|', deviceTypes);
1680 if (parts.size() > 0)
1682 platform = parts[0];
1687 #if defined WINRT || defined _WIN32_WCE
1688 static cl_device_id selectOpenCLDevice()
1693 // std::tolower is int->int
1694 static char char_tolower(char ch)
1696 return (char)std::tolower((int)ch);
1698 static cl_device_id selectOpenCLDevice()
1700 std::string platform, deviceName;
1701 std::vector<std::string> deviceTypes;
1703 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
1704 if (configuration &&
1705 (strcmp(configuration, "disabled") == 0 ||
1706 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
1712 if (deviceName.length() == 1)
1713 // We limit ID range to 0..9, because we want to write:
1714 // - '2500' to mean i5-2500
1715 // - '8350' to mean AMD FX-8350
1716 // - '650' to mean GeForce 650
1717 // To extend ID range change condition to '> 0'
1720 for (size_t i = 0; i < deviceName.length(); i++)
1722 if (!isdigit(deviceName[i]))
1730 deviceID = atoi(deviceName.c_str());
1736 std::vector<cl_platform_id> platforms;
1738 cl_uint numPlatforms = 0;
1739 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
1741 if (numPlatforms == 0)
1743 platforms.resize((size_t)numPlatforms);
1744 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
1745 platforms.resize(numPlatforms);
1748 int selectedPlatform = -1;
1749 if (platform.length() > 0)
1751 for (size_t i = 0; i < platforms.size(); i++)
1754 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
1755 if (name.find(platform) != std::string::npos)
1757 selectedPlatform = (int)i;
1761 if (selectedPlatform == -1)
1763 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
1767 if (deviceTypes.size() == 0)
1771 deviceTypes.push_back("GPU");
1773 deviceTypes.push_back("CPU");
1776 deviceTypes.push_back("ALL");
1778 for (size_t t = 0; t < deviceTypes.size(); t++)
1781 std::string tempStrDeviceType = deviceTypes[t];
1782 std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), char_tolower);
1784 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
1785 deviceType = Device::TYPE_GPU;
1786 else if (tempStrDeviceType == "cpu")
1787 deviceType = Device::TYPE_CPU;
1788 else if (tempStrDeviceType == "accelerator")
1789 deviceType = Device::TYPE_ACCELERATOR;
1790 else if (tempStrDeviceType == "all")
1791 deviceType = Device::TYPE_ALL;
1794 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
1798 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
1799 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
1800 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
1804 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
1805 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
1807 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
1811 size_t base = devices.size();
1812 devices.resize(base + count);
1813 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
1814 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
1816 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
1820 for (size_t i = (isID ? deviceID : 0);
1821 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
1825 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
1826 cl_bool useGPU = true;
1827 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
1829 cl_bool isIGPU = CL_FALSE;
1830 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
1831 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
1833 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
1835 // TODO check for OpenCL 1.1
1843 return NULL; // suppress messages on stderr
1845 std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << configuration << std::endl
1846 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
1847 << " Device types: ";
1848 for (size_t t = 0; t < deviceTypes.size(); t++)
1849 std::cerr << deviceTypes[t] << " ";
1851 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
1856 #ifdef HAVE_OPENCL_SVM
1859 enum AllocatorFlags { // don't use first 16 bits
1860 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
1861 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
1862 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
1863 OPENCL_SVM_BUFFER_MASK = 3 << 16,
1864 OPENCL_SVM_BUFFER_MAP = 4 << 16
1867 static bool checkForceSVMUmatUsage()
1869 static bool initialized = false;
1870 static bool force = false;
1873 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
1878 static bool checkDisableSVMUMatUsage()
1880 static bool initialized = false;
1881 static bool force = false;
1884 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
1889 static bool checkDisableSVM()
1891 static bool initialized = false;
1892 static bool force = false;
1895 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
1900 // see SVMCapabilities
1901 static unsigned int getSVMCapabilitiesMask()
1903 static bool initialized = false;
1904 static unsigned int mask = 0;
1907 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
1908 if (envValue == NULL)
1910 return ~0U; // all bits 1
1912 mask = atoi(envValue);
1920 static size_t getProgramCountLimit()
1922 static bool initialized = false;
1923 static size_t count = 0;
1926 count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
1932 struct Context::Impl
1934 static Context::Impl* get(Context& context) { return context.p; }
1940 #ifdef HAVE_OPENCL_SVM
1941 svmInitialized = false;
1952 CV_Assert(handle == NULL);
1954 cl_device_id d = selectOpenCLDevice();
1959 cl_platform_id pl = NULL;
1960 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
1962 cl_context_properties prop[] =
1964 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
1968 // !!! in the current implementation force the number of devices to 1 !!!
1972 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
1973 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
1975 bool ok = handle != 0 && status == CL_SUCCESS;
1990 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
1991 cl_context_properties prop[] =
1993 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
1998 int dtype = dtype0 & 15;
1999 cl_int status = clGetDeviceIDs(pl, dtype, 0, NULL, &nd0);
2000 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
2002 CV_OCL_DBG_CHECK_RESULT(status,
2003 cv::format("clGetDeviceIDs(platform=%p, device_type=%d, num_entries=0, devices=NULL, numDevices=%p)", pl, dtype, &nd0).c_str());
2009 AutoBuffer<void*> dlistbuf(nd0*2+1);
2010 cl_device_id* dlist = (cl_device_id*)dlistbuf.data();
2011 cl_device_id* dlist_new = dlist + nd0;
2012 CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0));
2016 for(i = 0; i < nd0; i++)
2019 if( !d.available() || !d.compilerAvailable() )
2021 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
2023 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
2025 String name = d.name();
2026 if( nd != 0 && name != name0 )
2029 dlist_new[nd++] = dlist[i];
2035 // !!! in the current implementation force the number of devices to 1 !!!
2038 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
2039 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateContext");
2040 bool ok = handle != 0 && retval == CL_SUCCESS;
2044 for( i = 0; i < nd; i++ )
2045 devices[i].set(dlist_new[i]);
2053 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2059 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2061 void unloadProg(Program& prog)
2063 cv::AutoLock lock(program_cache_mutex);
2064 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2066 phash_t::iterator it = phash.find(*i);
2067 if (it != phash.end())
2069 if (it->second.ptr() == prog.ptr())
2079 std::string& getPrefixString()
2083 cv::AutoLock lock(program_cache_mutex);
2086 CV_Assert(!devices.empty());
2087 const Device& d = devices[0];
2088 int bits = d.addressBits();
2089 if (bits > 0 && bits != 64)
2090 prefix = cv::format("%d-bit--", bits);
2091 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2093 for (size_t i = 0; i < prefix.size(); i++)
2096 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2106 std::string& getPrefixBase()
2108 if (prefix_base.empty())
2110 cv::AutoLock lock(program_cache_mutex);
2111 if (prefix_base.empty())
2113 const Device& d = devices[0];
2114 int bits = d.addressBits();
2115 if (bits > 0 && bits != 64)
2116 prefix_base = cv::format("%d-bit--", bits);
2117 prefix_base += d.vendorName() + "--" + d.name() + "--";
2119 for (size_t i = 0; i < prefix_base.size(); i++)
2121 char c = prefix_base[i];
2122 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2124 prefix_base[i] = '_';
2132 IMPLEMENT_REFCOUNTABLE();
2135 std::vector<Device> devices;
2138 std::string prefix_base;
2140 cv::Mutex program_cache_mutex;
2141 typedef std::map<std::string, Program> phash_t;
2143 typedef std::list<cv::String> CacheList;
2144 CacheList cacheList;
2146 #ifdef HAVE_OPENCL_SVM
2147 bool svmInitialized;
2150 svm::SVMCapabilities svmCapabilities;
2151 svm::SVMFunctions svmFunctions;
2155 CV_Assert(handle != NULL);
2156 const Device& device = devices[0];
2157 cl_device_svm_capabilities deviceCaps = 0;
2158 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2159 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2160 if (status != CL_SUCCESS)
2162 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2165 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2166 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2167 svmCapabilities.value_ =
2168 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2169 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2170 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2171 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2172 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2173 if (svmCapabilities.value_ == 0)
2175 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2181 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2182 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2185 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2186 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2191 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2192 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2194 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2195 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2200 ((int*)ptr)[0] = 100;
2204 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2207 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2209 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2210 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2215 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2220 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2221 clSVMFree(handle, ptr);
2224 clSVMFree(handle, ptr);
2225 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2226 svmFunctions.fn_clSVMFree = clSVMFree;
2227 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2228 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2229 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2230 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2231 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2232 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2233 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2237 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2240 // Try HSA extension
2241 String extensions = device.extensions();
2242 if (extensions.find("cl_amd_svm") == String::npos)
2244 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2247 cl_platform_id p = NULL;
2248 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2249 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2250 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2251 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2252 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2253 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2254 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2255 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2256 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2257 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2258 CV_Assert(svmFunctions.isValid());
2262 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2267 svmAvailable = true;
2268 svmEnabled = !svm::checkDisableSVM();
2269 svmInitialized = true;
2270 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2273 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2274 svmAvailable = false;
2276 svmCapabilities.value_ = 0;
2277 svmInitialized = true;
2278 svmFunctions.fn_clSVMAlloc = NULL;
2283 friend class Program;
2292 Context::Context(int dtype)
2298 bool Context::create()
2313 bool Context::create(int dtype0)
2319 p = new Impl(dtype0);
2337 Context::Context(const Context& c)
2344 Context& Context::operator = (const Context& c)
2346 Impl* newp = (Impl*)c.p;
2355 void* Context::ptr() const
2357 return p == NULL ? NULL : p->handle;
2360 size_t Context::ndevices() const
2362 return p ? p->devices.size() : 0;
2365 const Device& Context::device(size_t idx) const
2367 static Device dummy;
2368 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2371 Context& Context::getDefault(bool initialize)
2373 static Context* ctx = new Context();
2374 if(!ctx->p && haveOpenCL())
2377 ctx->p = new Impl();
2380 // do not create new Context right away.
2381 // First, try to retrieve existing context of the same type.
2382 // In its turn, Platform::getContext() may call Context::create()
2383 // if there is no such context.
2384 if (ctx->p->handle == NULL)
2385 ctx->p->setDefault();
2392 Program Context::getProg(const ProgramSource& prog,
2393 const String& buildopts, String& errmsg)
2395 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2398 void Context::unloadProg(Program& prog)
2401 p->unloadProg(prog);
2404 #ifdef HAVE_OPENCL_SVM
2405 bool Context::useSVM() const
2407 Context::Impl* i = p;
2409 if (!i->svmInitialized)
2411 return i->svmEnabled;
2413 void Context::setUseSVM(bool enabled)
2415 Context::Impl* i = p;
2417 if (!i->svmInitialized)
2419 if (enabled && !i->svmAvailable)
2421 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
2423 i->svmEnabled = enabled;
2426 bool Context::useSVM() const { return false; }
2427 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
2430 #ifdef HAVE_OPENCL_SVM
2433 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
2435 Context::Impl* i = context.p;
2437 if (!i->svmInitialized)
2439 return i->svmCapabilities;
2442 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
2444 Context::Impl* i = context.p;
2446 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
2447 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
2448 return &i->svmFunctions;
2451 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
2453 if (checkForceSVMUmatUsage())
2455 if (checkDisableSVMUMatUsage())
2457 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
2459 return false; // don't use SVM by default
2462 } // namespace cv::ocl::svm
2463 #endif // HAVE_OPENCL_SVM
2466 static void get_platform_name(cl_platform_id id, String& name)
2468 // get platform name string length
2470 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
2472 // get platform name string
2473 AutoBuffer<char> buf(sz + 1);
2474 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
2476 // just in case, ensure trailing zero for ASCIIZ string
2483 // Attaches OpenCL context to OpenCV
2485 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
2489 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
2492 CV_Error(cv::Error::OpenCLApiCallError, "no OpenCL platform available!");
2494 std::vector<cl_platform_id> platforms(cnt);
2496 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
2498 bool platformAvailable = false;
2500 // check if external platformName contained in list of available platforms in OpenCV
2501 for (unsigned int i = 0; i < cnt; i++)
2503 String availablePlatformName;
2504 get_platform_name(platforms[i], availablePlatformName);
2505 // external platform is found in the list of available platforms
2506 if (platformName == availablePlatformName)
2508 platformAvailable = true;
2513 if (!platformAvailable)
2514 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
2516 // check if platformID corresponds to platformName
2517 String actualPlatformName;
2518 get_platform_name((cl_platform_id)platformID, actualPlatformName);
2519 if (platformName != actualPlatformName)
2520 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
2522 // do not initialize OpenCL context
2523 Context ctx = Context::getDefault(false);
2525 // attach supplied context to OpenCV
2526 initializeContextFromHandle(ctx, platformID, context, deviceID);
2528 CV_OCL_CHECK(clRetainContext((cl_context)context));
2530 // clear command queue, if any
2531 CoreTLSData& data = getCoreTlsData();
2532 data.oclQueue.finish();
2537 } // attachContext()
2540 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
2542 cl_context context = (cl_context)_context;
2543 cl_device_id device = (cl_device_id)_device;
2545 // cleanup old context
2546 Context::Impl * impl = ctx.p;
2549 CV_OCL_DBG_CHECK(clReleaseContext(impl->handle));
2551 impl->devices.clear();
2553 impl->handle = context;
2554 impl->devices.resize(1);
2555 impl->devices[0].set(device);
2557 Platform& p = Platform::getDefault();
2558 Platform::Impl* pImpl = p.p;
2559 pImpl->handle = (cl_platform_id)platform;
2562 /////////////////////////////////////////// Queue /////////////////////////////////////////////
2566 inline void __init()
2570 isProfilingQueue_ = false;
2573 Impl(cl_command_queue q)
2578 cl_command_queue_properties props = 0;
2579 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
2580 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
2583 Impl(cl_command_queue q, bool isProfilingQueue)
2587 isProfilingQueue_ = isProfilingQueue;
2590 Impl(const Context& c, const Device& d, bool withProfiling = false)
2594 const Context* pc = &c;
2595 cl_context ch = (cl_context)pc->ptr();
2598 pc = &Context::getDefault();
2599 ch = (cl_context)pc->ptr();
2601 cl_device_id dh = (cl_device_id)d.ptr();
2603 dh = (cl_device_id)pc->device(0).ptr();
2605 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
2606 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
2607 isProfilingQueue_ = withProfiling;
2613 if (!cv::__termination)
2618 CV_OCL_DBG_CHECK(clFinish(handle));
2619 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
2625 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
2627 if (isProfilingQueue_)
2630 if (profiling_queue_.ptr())
2631 return profiling_queue_;
2634 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
2636 cl_device_id device = 0;
2637 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
2639 cl_int result = CL_SUCCESS;
2640 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
2641 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
2642 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
2645 queue.p = new Impl(q, true);
2646 profiling_queue_ = queue;
2648 return profiling_queue_;
2651 IMPLEMENT_REFCOUNTABLE();
2653 cl_command_queue handle;
2654 bool isProfilingQueue_;
2655 cv::ocl::Queue profiling_queue_;
2663 Queue::Queue(const Context& c, const Device& d)
2669 Queue::Queue(const Queue& q)
2676 Queue& Queue::operator = (const Queue& q)
2678 Impl* newp = (Impl*)q.p;
2693 bool Queue::create(const Context& c, const Device& d)
2698 return p->handle != 0;
2701 void Queue::finish()
2705 CV_OCL_DBG_CHECK(clFinish(p->handle));
2709 const Queue& Queue::getProfilingQueue() const
2712 return p->getProfilingQueue(*this);
2715 void* Queue::ptr() const
2717 return p ? p->handle : 0;
2720 Queue& Queue::getDefault()
2722 Queue& q = getCoreTlsData().oclQueue;
2723 if( !q.p && haveOpenCL() )
2724 q.create(Context::getDefault());
2728 static cl_command_queue getQueue(const Queue& q)
2730 cl_command_queue qq = (cl_command_queue)q.ptr();
2732 qq = (cl_command_queue)Queue::getDefault().ptr();
2736 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
2738 KernelArg::KernelArg()
2739 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
2743 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
2744 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
2746 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
2749 KernelArg KernelArg::Constant(const Mat& m)
2751 CV_Assert(m.isContinuous());
2752 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
2755 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
2759 Impl(const char* kname, const Program& prog) :
2760 refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
2762 cl_program ph = (cl_program)prog.ptr();
2767 handle = clCreateKernel(ph, kname, &retval);
2768 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
2770 for( int i = 0; i < MAX_ARRS; i++ )
2772 haveTempDstUMats = false;
2773 haveTempSrcUMats = false;
2778 for( int i = 0; i < MAX_ARRS; i++ )
2781 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
2783 u[i]->flags |= UMatData::ASYNC_CLEANUP;
2784 u[i]->currAllocator->deallocate(u[i]);
2789 haveTempDstUMats = false;
2790 haveTempSrcUMats = false;
2793 void addUMat(const UMat& m, bool dst)
2795 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
2797 CV_XADD(&m.u->urefcount, 1);
2799 if(dst && m.u->tempUMat())
2800 haveTempDstUMats = true;
2801 if(m.u->originalUMatData == NULL && m.u->tempUMat())
2802 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
2805 void addImage(const Image2D& image)
2807 images.push_back(image);
2810 void finit(cl_event e)
2815 isInProgress = false;
2819 bool run(int dims, size_t _globalsize[], size_t _localsize[],
2820 bool sync, int64* timeNS, const Queue& q);
2826 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
2830 IMPLEMENT_REFCOUNTABLE();
2834 enum { MAX_ARRS = 16 };
2835 UMatData* u[MAX_ARRS];
2837 bool isAsyncRun; // true if kernel was scheduled in async mode
2839 std::list<Image2D> images;
2840 bool haveTempDstUMats;
2841 bool haveTempSrcUMats;
2844 }} // namespace cv::ocl
2848 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2852 ((cv::ocl::Kernel::Impl*)p)->finit(e);
2854 catch (const cv::Exception& exc)
2856 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
2858 catch (const std::exception& exc)
2860 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
2864 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
2870 namespace cv { namespace ocl {
2877 Kernel::Kernel(const char* kname, const Program& prog)
2880 create(kname, prog);
2883 Kernel::Kernel(const char* kname, const ProgramSource& src,
2884 const String& buildopts, String* errmsg)
2887 create(kname, src, buildopts, errmsg);
2890 Kernel::Kernel(const Kernel& k)
2897 Kernel& Kernel::operator = (const Kernel& k)
2899 Impl* newp = (Impl*)k.p;
2914 bool Kernel::create(const char* kname, const Program& prog)
2918 p = new Impl(kname, prog);
2924 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
2930 bool Kernel::create(const char* kname, const ProgramSource& src,
2931 const String& buildopts, String* errmsg)
2939 if( !errmsg ) errmsg = &tempmsg;
2940 const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
2941 return create(kname, prog);
2944 void* Kernel::ptr() const
2946 return p ? p->handle : 0;
2949 bool Kernel::empty() const
2954 static cv::String dumpValue(size_t sz, const void* p)
2957 return cv::format("%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p);
2959 return cv::format("%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p);
2960 return cv::format("%p", p);
2963 int Kernel::set(int i, const void* value, size_t sz)
2965 if (!p || !p->handle)
2972 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
2973 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str());
2974 if (retval != CL_SUCCESS)
2979 int Kernel::set(int i, const Image2D& image2D)
2981 p->addImage(image2D);
2982 cl_mem h = (cl_mem)image2D.ptr();
2983 return set(i, &h, sizeof(h));
2986 int Kernel::set(int i, const UMat& m)
2988 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
2991 int Kernel::set(int i, const KernelArg& arg)
2993 if( !p || !p->handle )
2997 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
2998 p->name.c_str(), (int)i));
3006 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
3007 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
3008 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
3009 if (ptronly && arg.m->empty())
3011 cl_mem h_null = (cl_mem)NULL;
3012 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3013 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3016 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3020 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3021 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3027 #ifdef HAVE_OPENCL_SVM
3028 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3030 const Context& ctx = Context::getDefault();
3031 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3032 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3033 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3035 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3037 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3039 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3044 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3045 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3052 else if( arg.m->dims <= 2 )
3055 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3056 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3057 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3058 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3061 if( !(arg.flags & KernelArg::NO_SIZE) )
3063 int cols = u2d.cols*arg.wscale/arg.iwscale;
3064 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3065 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3066 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3067 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3074 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3075 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3076 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3077 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3078 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3079 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3081 if( !(arg.flags & KernelArg::NO_SIZE) )
3083 int cols = u3d.cols*arg.wscale/arg.iwscale;
3084 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3085 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3086 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3087 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3088 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3089 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3093 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
3096 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3097 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3101 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3102 bool sync, const Queue& q)
3107 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3109 CV_Assert(_globalsize != NULL);
3110 for (int i = 0; i < dims; i++)
3112 size_t val = _localsize ? _localsize[i] :
3113 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3114 CV_Assert( val > 0 );
3115 total *= _globalsize[i];
3116 if (_globalsize[i] == 1 && !_localsize)
3118 globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3120 CV_Assert(total > 0);
3122 return p->run(dims, globalsize, _localsize, sync, NULL, q);
3126 static bool isRaiseErrorOnReuseAsyncKernel()
3128 static bool initialized = false;
3129 static bool value = false;
3132 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
3138 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3139 bool sync, int64* timeNS, const Queue& q)
3141 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3145 CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
3151 CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3152 if (isRaiseErrorOnReuseAsyncKernel())
3154 return false; // OpenCV 5.0: raise error
3160 CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3161 if (isRaiseErrorOnReuseAsyncKernel())
3163 return false; // OpenCV 5.0: raise error
3166 cl_command_queue qq = getQueue(q);
3167 if (haveTempDstUMats)
3169 if (haveTempSrcUMats)
3173 cl_event asyncEvent = 0;
3174 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3175 NULL, globalsize, localsize, 0, 0,
3176 (sync && !timeNS) ? 0 : &asyncEvent);
3177 #if !CV_OPENCL_SHOW_RUN_KERNELS
3178 if (retval != CL_SUCCESS)
3181 cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%dx%dx%d, localsize=%s) sync=%s", name.c_str(), (int)dims,
3182 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3183 (localsize ? cv::format("%dx%dx%d", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3184 sync ? "true" : "false"
3186 if (retval != CL_SUCCESS)
3188 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3190 #if CV_OPENCL_TRACE_CHECK
3191 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3193 printf("%s\n", msg.c_str());
3197 if (sync || retval != CL_SUCCESS)
3199 CV_OCL_DBG_CHECK(clFinish(qq));
3202 if (retval == CL_SUCCESS)
3204 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3205 cl_ulong startTime, stopTime;
3206 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3207 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3208 *timeNS = (int64)(stopTime - startTime);
3220 isInProgress = true;
3221 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3224 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3225 return retval == CL_SUCCESS;
3228 bool Kernel::runTask(bool sync, const Queue& q)
3230 if(!p || !p->handle || p->isInProgress)
3233 cl_command_queue qq = getQueue(q);
3234 cl_event asyncEvent = 0;
3235 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3236 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3237 if (sync || retval != CL_SUCCESS)
3239 CV_OCL_DBG_CHECK(clFinish(qq));
3245 p->isInProgress = true;
3246 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3249 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3250 return retval == CL_SUCCESS;
3253 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3255 CV_Assert(p && p->handle && !p->isInProgress);
3256 Queue q = q_.ptr() ? q_ : Queue::getDefault();
3258 q.finish(); // call clFinish() on base queue
3259 Queue profilingQueue = q.getProfilingQueue();
3261 bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3262 return res ? timeNs : -1;
3265 size_t Kernel::workGroupSize() const
3267 if(!p || !p->handle)
3269 size_t val = 0, retsz = 0;
3270 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3271 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3272 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3273 return status == CL_SUCCESS ? val : 0;
3276 size_t Kernel::preferedWorkGroupSizeMultiple() const
3278 if(!p || !p->handle)
3280 size_t val = 0, retsz = 0;
3281 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3282 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3283 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3284 return status == CL_SUCCESS ? val : 0;
3287 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3289 if(!p || !p->handle || !wsz)
3292 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3293 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3294 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3295 return status == CL_SUCCESS;
3298 size_t Kernel::localMemSize() const
3300 if(!p || !p->handle)
3304 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3305 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
3306 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
3307 return status == CL_SUCCESS ? (size_t)val : 0;
3312 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3314 struct ProgramSource::Impl
3316 IMPLEMENT_REFCOUNTABLE();
3319 PROGRAM_SOURCE_CODE = 0,
3325 Impl(const String& src)
3327 init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
3328 initFromSource(src, cv::String());
3330 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
3332 init(PROGRAM_SOURCE_CODE, module, name);
3333 initFromSource(codeStr, codeHash);
3337 void init(enum KIND kind, const String& module, const String& name)
3346 isHashUpdated = false;
3349 void initFromSource(const String& codeStr, const String& codeHash)
3352 sourceHash_ = codeHash;
3353 if (sourceHash_.empty())
3359 isHashUpdated = true;
3363 void updateHash(const char* hashStr = NULL)
3367 sourceHash_ = cv::String(hashStr);
3368 isHashUpdated = true;
3374 case PROGRAM_SOURCE_CODE:
3377 CV_Assert(codeStr_.empty());
3378 hash = crc64(sourceAddr_, sourceSize_); // static storage
3382 CV_Assert(!codeStr_.empty());
3383 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
3386 case PROGRAM_BINARIES:
3389 hash = crc64(sourceAddr_, sourceSize_);
3392 CV_Error(Error::StsInternal, "Internal error");
3394 sourceHash_ = cv::format("%08llx", hash);
3395 isHashUpdated = true;
3398 Impl(enum KIND kind,
3399 const String& module, const String& name,
3400 const unsigned char* binary, const size_t size,
3401 const cv::String& buildOptions = cv::String())
3403 init(kind, module, name);
3405 sourceAddr_ = binary;
3408 buildOptions_ = buildOptions;
3411 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
3412 const char* sourceCodeStaticStr, const char* hashStaticStr,
3413 const cv::String& buildOptions)
3415 ProgramSource result;
3416 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
3417 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
3418 result.p->updateHash(hashStaticStr);
3422 static ProgramSource fromBinary(const String& module, const String& name,
3423 const unsigned char* binary, const size_t size,
3424 const cv::String& buildOptions)
3426 ProgramSource result;
3427 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
3431 static ProgramSource fromSPIR(const String& module, const String& name,
3432 const unsigned char* binary, const size_t size,
3433 const cv::String& buildOptions)
3435 ProgramSource result;
3436 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
3443 // TODO std::vector<ProgramSource> includes_;
3444 String codeStr_; // PROGRAM_SOURCE_CODE only
3446 const unsigned char* sourceAddr_;
3449 cv::String buildOptions_;
3454 friend struct Program::Impl;
3455 friend struct internal::ProgramEntry;
3456 friend struct Context::Impl;
3460 ProgramSource::ProgramSource()
3465 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
3467 p = new Impl(module, name, codeStr, codeHash);
3470 ProgramSource::ProgramSource(const char* prog)
3475 ProgramSource::ProgramSource(const String& prog)
3480 ProgramSource::~ProgramSource()
3486 ProgramSource::ProgramSource(const ProgramSource& prog)
3493 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
3495 Impl* newp = (Impl*)prog.p;
3504 const String& ProgramSource::source() const
3507 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
3508 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
3512 ProgramSource::hash_t ProgramSource::hash() const
3514 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
3517 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
3518 const unsigned char* binary, const size_t size,
3519 const cv::String& buildOptions)
3522 CV_Assert(size > 0);
3523 return Impl::fromBinary(module, name, binary, size, buildOptions);
3526 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
3527 const unsigned char* binary, const size_t size,
3528 const cv::String& buildOptions)
3531 CV_Assert(size > 0);
3532 return Impl::fromBinary(module, name, binary, size, buildOptions);
3536 internal::ProgramEntry::operator ProgramSource&() const
3538 if (this->pProgramSource == NULL)
3540 cv::AutoLock lock(cv::getInitializationMutex());
3541 if (this->pProgramSource == NULL)
3543 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
3544 ProgramSource* ptr = new ProgramSource(ps);
3545 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
3548 return *this->pProgramSource;
3553 /////////////////////////////////////////// Program /////////////////////////////////////////////
3556 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
3564 return a + (cv::String(" ") + b);
3567 struct Program::Impl
3569 IMPLEMENT_REFCOUNTABLE();
3571 Impl(const ProgramSource& src,
3572 const String& _buildflags, String& errmsg) :
3575 buildflags(_buildflags)
3577 const ProgramSource::Impl* src_ = src.getImpl();
3579 sourceModule_ = src_->module_;
3580 sourceName_ = src_->name_;
3581 const Context ctx = Context::getDefault();
3582 Device device = ctx.device(0);
3583 if (ctx.ptr() == NULL || device.ptr() == NULL)
3585 buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
3586 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
3589 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
3590 else if (device.isIntel())
3591 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
3592 const String param_buildExtraOptions = getBuildExtraOptions();
3593 if (!param_buildExtraOptions.empty())
3594 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
3596 compile(ctx, src_, errmsg);
3599 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3601 CV_Assert(ctx.getImpl());
3604 // We don't cache OpenCL binaries
3605 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
3607 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
3608 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
3611 return compileWithCache(ctx, src_, errmsg);
3614 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3616 CV_Assert(ctx.getImpl());
3618 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
3620 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
3621 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
3622 const std::string base_dir = config.prepareCacheDirectoryForContext(
3623 ctx.getImpl()->getPrefixString(),
3624 ctx.getImpl()->getPrefixBase()
3626 const String& hash_str = src_->sourceHash_;
3628 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
3630 CV_Assert(!hash_str.empty());
3631 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
3632 fname = utils::fs::join(base_dir, fname);
3634 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
3635 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
3639 std::vector<char> binaryBuf;
3642 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
3643 BinaryProgramFile file(fname, hash_str.c_str());
3644 res = file.read(buildflags, binaryBuf);
3648 CV_Assert(!binaryBuf.empty());
3649 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
3650 bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
3655 catch (const cv::Exception& e)
3658 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
3662 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
3665 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
3666 CV_Assert(handle == NULL);
3667 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
3669 if (!buildFromSources(ctx, src_, errmsg))
3674 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
3676 buildflags = joinBuildOptions(buildflags, " -x spir");
3677 if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
3679 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
3681 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
3682 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
3686 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
3688 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
3692 CV_Error(Error::StsInternal, "Internal error");
3694 CV_Assert(handle != NULL);
3695 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
3696 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
3700 std::vector<char> binaryBuf;
3701 getProgramBinary(binaryBuf);
3703 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
3704 BinaryProgramFile file(fname, hash_str.c_str());
3705 file.write(buildflags, binaryBuf);
3708 catch (const cv::Exception& e)
3710 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
3714 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
3717 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
3718 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3719 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3721 std::vector<char> binaryBuf;
3722 getProgramBinary(binaryBuf);
3723 if (!binaryBuf.empty())
3725 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3727 createFromBinary(ctx, binaryBuf, errmsg);
3731 return handle != NULL;
3734 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
3736 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
3739 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
3740 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
3741 if (log_retval == CL_SUCCESS && retsz > 1)
3743 buffer.resize(retsz + 16);
3744 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
3745 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
3746 if (log_retval == CL_SUCCESS)
3748 if (retsz < buffer.size())
3751 buffer[buffer.size() - 1] = 0;
3759 errmsg = String(buffer.data());
3760 printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
3761 sourceModule_.c_str(), sourceName_.c_str(),
3762 result, getOpenCLErrorString(result),
3763 buildflags.c_str(), errmsg.c_str());
3767 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3770 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
3771 CV_Assert(handle == NULL);
3772 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
3773 sourceModule_.c_str(), sourceName_.c_str(),
3774 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
3776 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
3778 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
3779 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
3780 CV_Assert(srcptr != NULL);
3781 CV_Assert(srclen > 0);
3785 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
3786 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
3787 CV_Assert(handle || retval != CL_SUCCESS);
3788 if (handle && retval == CL_SUCCESS)
3790 size_t n = ctx.ndevices();
3791 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
3792 cl_device_id* deviceList = deviceListBuf.data();
3793 for (size_t i = 0; i < n; i++)
3795 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
3798 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
3799 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
3800 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
3801 if (retval != CL_SUCCESS)
3804 dumpBuildLog_(retval, deviceList, errmsg);
3806 // don't remove "retval != CL_SUCCESS" condition here:
3807 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
3808 if (retval != CL_SUCCESS && handle)
3810 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3814 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3815 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3817 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
3819 char kernels_buffer[4096] = {0};
3820 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
3821 if (retsz < sizeof(kernels_buffer))
3822 kernels_buffer[retsz] = 0;
3824 kernels_buffer[0] = 0;
3825 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
3830 return handle != NULL;
3833 void getProgramBinary(std::vector<char>& buf)
3837 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
3839 uchar* ptr = (uchar*)&buf[0];
3840 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
3843 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
3845 return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
3848 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
3850 CV_Assert(handle == NULL);
3851 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
3852 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
3854 CV_Assert(binarySize > 0);
3856 size_t ndevices = (int)ctx.ndevices();
3857 AutoBuffer<cl_device_id> devices_(ndevices);
3858 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
3859 AutoBuffer<size_t> binarySizes_(ndevices);
3861 cl_device_id* devices = devices_.data();
3862 const uchar** binaryPtrs = binaryPtrs_.data();
3863 size_t* binarySizes = binarySizes_.data();
3864 for (size_t i = 0; i < ndevices; i++)
3866 devices[i] = (cl_device_id)ctx.device(i).ptr();
3867 binaryPtrs[i] = binaryAddr;
3868 binarySizes[i] = binarySize;
3872 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
3873 binarySizes, binaryPtrs, NULL, &result);
3874 if (result != CL_SUCCESS)
3876 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
3879 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3887 // call clBuildProgram()
3889 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
3890 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
3891 if (result != CL_SUCCESS)
3893 dumpBuildLog_(result, devices, errmsg);
3896 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3902 // check build status
3904 cl_build_status build_status = CL_BUILD_NONE;
3906 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
3907 sizeof(build_status), &build_status, &retsz));
3908 if (result == CL_SUCCESS)
3910 if (build_status == CL_BUILD_SUCCESS)
3916 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
3922 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
3925 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3930 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3931 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3933 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
3935 char kernels_buffer[4096] = {0};
3936 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
3937 if (retsz < sizeof(kernels_buffer))
3938 kernels_buffer[retsz] = 0;
3940 kernels_buffer[0] = 0;
3941 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
3944 return handle != NULL;
3952 if (!cv::__termination)
3955 clReleaseProgram(handle);
3964 String sourceModule_;
3969 Program::Program() { p = 0; }
3971 Program::Program(const ProgramSource& src,
3972 const String& buildflags, String& errmsg)
3975 create(src, buildflags, errmsg);
3978 Program::Program(const Program& prog)
3985 Program& Program::operator = (const Program& prog)
3987 Impl* newp = (Impl*)prog.p;
4002 bool Program::create(const ProgramSource& src,
4003 const String& buildflags, String& errmsg)
4010 p = new Impl(src, buildflags, errmsg);
4019 void* Program::ptr() const
4021 return p ? p->handle : 0;
4024 #ifndef OPENCV_REMOVE_DEPRECATED_API
4025 const ProgramSource& Program::source() const
4027 CV_Error(Error::StsNotImplemented, "Removed API");
4030 bool Program::read(const String& bin, const String& buildflags)
4032 CV_UNUSED(bin); CV_UNUSED(buildflags);
4033 CV_Error(Error::StsNotImplemented, "Removed API");
4036 bool Program::write(String& bin) const
4039 CV_Error(Error::StsNotImplemented, "Removed API");
4042 String Program::getPrefix() const
4046 Context::Impl* ctx_ = Context::getDefault().getImpl();
4048 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4051 String Program::getPrefix(const String& buildflags)
4053 Context::Impl* ctx_ = Context::getDefault().getImpl();
4055 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4057 #endif // OPENCV_REMOVE_DEPRECATED_API
4059 void Program::getBinary(std::vector<char>& binary) const
4061 CV_Assert(p && "Empty program");
4062 p->getProgramBinary(binary);
4065 Program Context::Impl::getProg(const ProgramSource& src,
4066 const String& buildflags, String& errmsg)
4068 size_t limit = getProgramCountLimit();
4069 const ProgramSource::Impl* src_ = src.getImpl();
4071 String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4072 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4073 getPrefixString().c_str(),
4074 buildflags.c_str());
4076 cv::AutoLock lock(program_cache_mutex);
4077 phash_t::iterator it = phash.find(key);
4078 if (it != phash.end())
4081 CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4082 if (i != cacheList.end() && i != cacheList.begin())
4085 cacheList.push_front(key);
4089 { // cleanup program cache
4090 size_t sz = phash.size();
4091 if (limit > 0 && sz >= limit)
4093 static bool warningFlag = false;
4096 printf("\nWARNING: OpenCV-OpenCL:\n"
4097 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4098 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4101 while (!cacheList.empty())
4103 size_t c = phash.erase(cacheList.back());
4104 cacheList.pop_back();
4111 Program prog(src, buildflags, errmsg);
4112 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4114 cv::AutoLock lock(program_cache_mutex);
4115 phash.insert(std::pair<std::string, Program>(key, prog));
4116 cacheList.push_front(key);
4122 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4124 template<typename T>
4125 class OpenCLBufferPool
4128 ~OpenCLBufferPool() { }
4130 virtual T allocate(size_t size) = 0;
4131 virtual void release(T buffer) = 0;
4134 template <typename Derived, typename BufferEntry, typename T>
4135 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4138 inline Derived& derived() { return *static_cast<Derived*>(this); }
4142 size_t currentReservedSize;
4143 size_t maxReservedSize;
4145 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4146 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4149 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4151 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4152 for (; i != allocatedEntries_.end(); ++i)
4154 BufferEntry& e = *i;
4155 if (e.clBuffer_ == buffer)
4158 allocatedEntries_.erase(i);
4166 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4168 if (reservedEntries_.empty())
4170 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4171 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4173 size_t minDiff = (size_t)(-1);
4174 for (; i != reservedEntries_.end(); ++i)
4176 BufferEntry& e = *i;
4177 if (e.capacity_ >= size)
4179 size_t diff = e.capacity_ - size;
4180 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4190 if (result_pos != reservedEntries_.end())
4192 //CV_DbgAssert(result == *result_pos);
4193 reservedEntries_.erase(result_pos);
4195 currentReservedSize -= entry.capacity_;
4196 allocatedEntries_.push_back(entry);
4203 void _checkSizeOfReservedEntries()
4205 while (currentReservedSize > maxReservedSize)
4207 CV_DbgAssert(!reservedEntries_.empty());
4208 const BufferEntry& entry = reservedEntries_.back();
4209 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4210 currentReservedSize -= entry.capacity_;
4211 derived()._releaseBufferEntry(entry);
4212 reservedEntries_.pop_back();
4216 inline size_t _allocationGranularity(size_t size)
4219 if (size < 1024*1024)
4220 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4221 else if (size < 16*1024*1024)
4228 OpenCLBufferPoolBaseImpl()
4229 : currentReservedSize(0),
4234 virtual ~OpenCLBufferPoolBaseImpl()
4236 freeAllReservedBuffers();
4237 CV_Assert(reservedEntries_.empty());
4240 virtual T allocate(size_t size) CV_OVERRIDE
4242 AutoLock locker(mutex_);
4244 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4246 CV_DbgAssert(size <= entry.capacity_);
4247 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4251 derived()._allocateBufferEntry(entry, size);
4253 return entry.clBuffer_;
4255 virtual void release(T buffer) CV_OVERRIDE
4257 AutoLock locker(mutex_);
4259 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
4260 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
4262 derived()._releaseBufferEntry(entry);
4266 reservedEntries_.push_front(entry);
4267 currentReservedSize += entry.capacity_;
4268 _checkSizeOfReservedEntries();
4272 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
4273 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
4274 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
4276 AutoLock locker(mutex_);
4277 size_t oldMaxReservedSize = maxReservedSize;
4278 maxReservedSize = size;
4279 if (maxReservedSize < oldMaxReservedSize)
4281 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4282 for (; i != reservedEntries_.end();)
4284 const BufferEntry& entry = *i;
4285 if (entry.capacity_ > maxReservedSize / 8)
4287 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4288 currentReservedSize -= entry.capacity_;
4289 derived()._releaseBufferEntry(entry);
4290 i = reservedEntries_.erase(i);
4295 _checkSizeOfReservedEntries();
4298 virtual void freeAllReservedBuffers() CV_OVERRIDE
4300 AutoLock locker(mutex_);
4301 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
4302 for (; i != reservedEntries_.end(); ++i)
4304 const BufferEntry& entry = *i;
4305 derived()._releaseBufferEntry(entry);
4307 reservedEntries_.clear();
4308 currentReservedSize = 0;
4312 struct CLBufferEntry
4316 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
4319 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
4322 typedef struct CLBufferEntry BufferEntry;
4326 OpenCLBufferPoolImpl(int createFlags = 0)
4327 : createFlags_(createFlags)
4331 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4333 CV_DbgAssert(entry.clBuffer_ == NULL);
4334 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4335 Context& ctx = Context::getDefault();
4336 cl_int retval = CL_SUCCESS;
4337 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
4338 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
4339 CV_Assert(entry.clBuffer_ != NULL);
4340 if(retval == CL_SUCCESS)
4342 CV_IMPL_ADD(CV_IMPL_OCL);
4344 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
4345 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4346 allocatedEntries_.push_back(entry);
4349 void _releaseBufferEntry(const BufferEntry& entry)
4351 CV_Assert(entry.capacity_ != 0);
4352 CV_Assert(entry.clBuffer_ != NULL);
4353 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
4354 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4355 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
4359 #ifdef HAVE_OPENCL_SVM
4360 struct CLSVMBufferEntry
4364 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
4366 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
4369 typedef struct CLSVMBufferEntry BufferEntry;
4371 OpenCLSVMBufferPoolImpl()
4375 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4377 CV_DbgAssert(entry.clBuffer_ == NULL);
4378 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4380 Context& ctx = Context::getDefault();
4381 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4382 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4383 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
4384 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4386 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4387 CV_DbgAssert(svmFns->isValid());
4389 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
4390 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
4393 entry.clBuffer_ = buf;
4395 CV_IMPL_ADD(CV_IMPL_OCL);
4397 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
4398 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4399 allocatedEntries_.push_back(entry);
4402 void _releaseBufferEntry(const BufferEntry& entry)
4404 CV_Assert(entry.capacity_ != 0);
4405 CV_Assert(entry.clBuffer_ != NULL);
4406 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
4407 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4408 Context& ctx = Context::getDefault();
4409 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4410 CV_DbgAssert(svmFns->isValid());
4411 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
4412 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
4419 template <bool readAccess, bool writeAccess>
4420 class AlignedDataPtr
4424 uchar* const originPtr_;
4425 const size_t alignment_;
4427 uchar* allocatedPtr_;
4430 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
4431 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
4433 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4434 CV_DbgAssert(!readAccess || ptr);
4435 if (((size_t)ptr_ & (alignment - 1)) != 0)
4437 allocatedPtr_ = new uchar[size_ + alignment - 1];
4438 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4441 memcpy(ptr_, originPtr_, size_);
4446 uchar* getAlignedPtr() const
4448 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
4458 memcpy(originPtr_, ptr_, size_);
4460 delete[] allocatedPtr_;
4461 allocatedPtr_ = NULL;
4466 AlignedDataPtr(const AlignedDataPtr&); // disabled
4467 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
4470 template <bool readAccess, bool writeAccess>
4471 class AlignedDataPtr2D
4475 uchar* const originPtr_;
4476 const size_t alignment_;
4478 uchar* allocatedPtr_;
4484 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
4485 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
4487 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4488 CV_DbgAssert(!readAccess || ptr != NULL);
4489 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
4491 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
4492 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4495 for (size_t i = 0; i < rows_; i++)
4496 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
4501 uchar* getAlignedPtr() const
4503 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
4513 for (size_t i = 0; i < rows_; i++)
4514 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
4516 delete[] allocatedPtr_;
4517 allocatedPtr_ = NULL;
4522 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
4523 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
4526 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
4527 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
4530 class OpenCLAllocator CV_FINAL : public MatAllocator
4532 mutable OpenCLBufferPoolImpl bufferPool;
4533 mutable OpenCLBufferPoolImpl bufferPoolHostPtr;
4534 #ifdef HAVE_OPENCL_SVM
4535 mutable OpenCLSVMBufferPoolImpl bufferPoolSVM;
4541 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
4542 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
4543 #ifdef HAVE_OPENCL_SVM
4544 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
4546 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
4551 bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR)
4553 size_t defaultPoolSize, poolSize;
4554 defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
4555 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
4556 bufferPool.setMaxReservedSize(poolSize);
4557 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
4558 bufferPoolHostPtr.setMaxReservedSize(poolSize);
4559 #ifdef HAVE_OPENCL_SVM
4560 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
4561 bufferPoolSVM.setMaxReservedSize(poolSize);
4564 matStdAllocator = Mat::getDefaultAllocator();
4568 flushCleanupQueue();
4571 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
4572 int flags, UMatUsageFlags usageFlags) const
4574 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
4578 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
4580 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
4583 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
4585 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
4589 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
4591 const Device& dev = ctx.device(0);
4593 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
4594 createFlags |= CL_MEM_ALLOC_HOST_PTR;
4596 if (!isOpenCLCopyingForced() &&
4597 (isOpenCLMapForced() ||
4598 (dev.hostUnifiedMemory()
4607 flags0 = UMatData::COPY_ON_MAP;
4610 UMatData* allocate(int dims, const int* sizes, int type,
4611 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
4614 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
4615 CV_Assert(data == 0);
4616 size_t total = CV_ELEM_SIZE(type);
4617 for( int i = dims-1; i >= 0; i-- )
4624 Context& ctx = Context::getDefault();
4625 flushCleanupQueue();
4627 int createFlags = 0, flags0 = 0;
4628 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
4630 void* handle = NULL;
4631 int allocatorFlags = 0;
4633 #ifdef HAVE_OPENCL_SVM
4634 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4635 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
4637 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
4638 handle = bufferPoolSVM.allocate(total);
4640 // this property is constant, so single buffer pool can be used here
4641 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4642 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
4646 if (createFlags == 0)
4648 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
4649 handle = bufferPool.allocate(total);
4651 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
4653 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
4654 handle = bufferPoolHostPtr.allocate(total);
4658 CV_Assert(handle != NULL); // Unsupported, throw
4662 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
4664 UMatData* u = new UMatData(this);
4669 u->allocatorFlags_ = allocatorFlags;
4670 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
4671 u->markHostCopyObsolete(true);
4672 opencl_allocator_stats.onAllocate(u->size);
4676 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
4681 flushCleanupQueue();
4683 UMatDataAutoLock lock(u);
4687 CV_Assert(u->origdata != 0);
4688 Context& ctx = Context::getDefault();
4689 int createFlags = 0, flags0 = 0;
4690 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
4692 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
4694 cl_context ctx_handle = (cl_context)ctx.ptr();
4695 int allocatorFlags = 0;
4696 int tempUMatFlags = 0;
4697 void* handle = NULL;
4698 cl_int retval = CL_SUCCESS;
4700 #ifdef HAVE_OPENCL_SVM
4701 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4702 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
4703 if (useSVM && svmCaps.isSupportFineGrainSystem())
4705 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
4706 tempUMatFlags = UMatData::TEMP_UMAT;
4707 handle = u->origdata;
4708 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
4710 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
4712 if (!(accessFlags & ACCESS_FAST)) // memcpy used
4714 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4716 cl_svm_mem_flags memFlags = createFlags |
4717 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4719 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4720 CV_DbgAssert(svmFns->isValid());
4722 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
4723 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
4726 cl_command_queue q = NULL;
4727 if (!isFineGrainBuffer)
4729 q = (cl_command_queue)Queue::getDefault().ptr();
4730 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
4731 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
4734 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
4737 memcpy(handle, u->origdata, u->size);
4738 if (!isFineGrainBuffer)
4740 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
4741 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
4742 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4745 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
4746 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
4747 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
4754 accessFlags &= ~ACCESS_FAST;
4756 tempUMatFlags = UMatData::TEMP_UMAT;
4761 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
4762 // There are OpenCL runtime issues for less aligned data
4763 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
4764 && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
4765 // Avoid sharing of host memory between OpenCL buffers
4766 && !(u->originalUMatData && u->originalUMatData->handle)
4769 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
4770 u->size, u->origdata, &retval);
4771 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
4772 (long long int)u->size, u->origdata, (void*)handle).c_str());
4774 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
4776 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
4777 u->size, u->origdata, &retval);
4778 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
4779 (long long int)u->size, u->origdata, (void*)handle).c_str());
4780 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
4783 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
4784 if(!handle || retval != CL_SUCCESS)
4787 u->prevAllocator = u->currAllocator;
4788 u->currAllocator = this;
4789 u->flags |= tempUMatFlags | flags0;
4790 u->allocatorFlags_ = allocatorFlags;
4792 if(accessFlags & ACCESS_WRITE)
4793 u->markHostCopyObsolete(true);
4794 opencl_allocator_stats.onAllocate(u->size);
4798 /*void sync(UMatData* u) const
4800 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4801 UMatDataAutoLock lock(u);
4803 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
4805 if( u->tempCopiedUMat() )
4807 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4808 u->size, u->origdata, 0, 0, 0);
4813 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
4814 (CL_MAP_READ | CL_MAP_WRITE),
4815 0, u->size, 0, 0, 0, &retval);
4816 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
4819 u->markHostCopyObsolete(false);
4821 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
4823 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4824 u->size, u->data, 0, 0, 0);
4828 void deallocate(UMatData* u) const CV_OVERRIDE
4833 CV_Assert(u->urefcount == 0);
4834 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
4836 CV_Assert(u->handle != 0);
4837 CV_Assert(u->mapcount == 0);
4839 if (u->flags & UMatData::ASYNC_CLEANUP)
4840 addToCleanupQueue(u);
4845 void deallocate_(UMatData* u) const
4848 CV_Assert(u->handle);
4849 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
4851 opencl_allocator_stats.onFree(u->size);
4855 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
4856 return; // avoid any OpenCL calls
4860 CV_Assert(u->origdata);
4861 // UMatDataAutoLock lock(u);
4863 if (u->hostCopyObsolete())
4865 #ifdef HAVE_OPENCL_SVM
4866 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
4868 Context& ctx = Context::getDefault();
4869 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4870 CV_DbgAssert(svmFns->isValid());
4872 if( u->tempCopiedUMat() )
4874 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
4875 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
4876 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
4877 cl_command_queue q = NULL;
4878 if (!isFineGrainBuffer)
4880 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
4881 q = (cl_command_queue)Queue::getDefault().ptr();
4882 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
4883 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
4886 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
4889 memcpy(u->origdata, u->handle, u->size);
4890 if (!isFineGrainBuffer)
4892 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
4893 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
4894 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4899 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
4906 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4907 if( u->tempCopiedUMat() )
4909 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
4910 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4911 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
4918 CV_Assert(u->mapcount == 0);
4919 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
4920 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
4921 (CL_MAP_READ | CL_MAP_WRITE),
4922 0, u->size, 0, 0, 0, &retval);
4923 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
4924 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
4925 if (u->originalUMatData)
4927 CV_Assert(u->originalUMatData->data == data);
4929 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
4930 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
4931 CV_OCL_DBG_CHECK(clFinish(q));
4935 u->markHostCopyObsolete(false);
4941 #ifdef HAVE_OPENCL_SVM
4942 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
4944 if( u->tempCopiedUMat() )
4946 Context& ctx = Context::getDefault();
4947 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4948 CV_DbgAssert(svmFns->isValid());
4950 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
4951 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
4957 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
4958 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
4961 u->markDeviceCopyObsolete(true);
4962 u->currAllocator = u->prevAllocator;
4963 u->prevAllocator = NULL;
4964 if(u->data && u->copyOnMap() && u->data != u->origdata)
4966 u->data = u->origdata;
4967 u->currAllocator->deallocate(u);
4972 CV_Assert(u->origdata == NULL);
4973 if(u->data && u->copyOnMap() && u->data != u->origdata)
4977 u->markHostCopyObsolete(true);
4979 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
4981 bufferPool.release((cl_mem)u->handle);
4983 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
4985 bufferPoolHostPtr.release((cl_mem)u->handle);
4987 #ifdef HAVE_OPENCL_SVM
4988 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
4990 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
4994 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
4995 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
4997 Context& ctx = Context::getDefault();
4998 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4999 CV_DbgAssert(svmFns->isValid());
5000 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5002 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
5004 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5005 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
5006 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5009 bufferPoolSVM.release((void*)u->handle);
5014 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
5017 u->markDeviceCopyObsolete(true);
5021 CV_Assert(u == NULL);
5024 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
5025 void map(UMatData* u, int accessFlags) const CV_OVERRIDE
5027 CV_Assert(u && u->handle);
5029 if(accessFlags & ACCESS_WRITE)
5030 u->markDeviceCopyObsolete(true);
5032 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5035 if( !u->copyOnMap() )
5038 // because there can be other map requests for the same UMat with different access flags,
5039 // we use the universal (read-write) access mode.
5040 #ifdef HAVE_OPENCL_SVM
5041 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5043 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5045 Context& ctx = Context::getDefault();
5046 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5047 CV_DbgAssert(svmFns->isValid());
5049 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5051 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5052 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5055 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5056 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5060 u->data = (uchar*)u->handle;
5061 u->markHostCopyObsolete(false);
5062 u->markDeviceMemMapped(true);
5067 cl_int retval = CL_SUCCESS;
5068 if (!u->deviceMemMapped())
5070 CV_Assert(u->refcount == 1);
5071 CV_Assert(u->mapcount++ == 0);
5072 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5073 (CL_MAP_READ | CL_MAP_WRITE),
5074 0, u->size, 0, 0, 0, &retval);
5075 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5077 if (u->data && retval == CL_SUCCESS)
5079 u->markHostCopyObsolete(false);
5080 u->markDeviceMemMapped(true);
5084 // TODO Is it really a good idea and was it tested well?
5085 // if map failed, switch to copy-on-map mode for the particular buffer
5086 u->flags |= UMatData::COPY_ON_MAP;
5091 u->data = (uchar*)fastMalloc(u->size);
5092 u->markHostCopyObsolete(true);
5096 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
5098 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5099 #ifdef HAVE_OPENCL_SVM
5100 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5102 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5103 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5104 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5105 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5106 u->markHostCopyObsolete(false);
5110 void unmap(UMatData* u) const CV_OVERRIDE
5116 CV_Assert(u->handle != 0);
5118 UMatDataAutoLock autolock(u);
5120 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5122 if( !u->copyOnMap() && u->deviceMemMapped() )
5124 CV_Assert(u->data != NULL);
5125 #ifdef HAVE_OPENCL_SVM
5126 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5128 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5130 Context& ctx = Context::getDefault();
5131 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5132 CV_DbgAssert(svmFns->isValid());
5134 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5136 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5137 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5139 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5141 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5144 if (u->refcount == 0)
5146 u->markDeviceCopyObsolete(false);
5147 u->markHostCopyObsolete(true);
5151 if (u->refcount == 0)
5153 CV_Assert(u->mapcount-- == 1);
5154 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5155 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5156 if (Device::getDefault().isAMD())
5158 // required for multithreaded applications (see stitching test)
5159 CV_OCL_DBG_CHECK(clFinish(q));
5161 u->markDeviceMemMapped(false);
5163 u->markDeviceCopyObsolete(false);
5164 u->markHostCopyObsolete(true);
5167 else if( u->copyOnMap() && u->deviceCopyObsolete() )
5169 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5170 #ifdef HAVE_OPENCL_SVM
5171 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5173 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5174 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5175 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5176 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5177 u->markDeviceCopyObsolete(false);
5178 u->markHostCopyObsolete(true);
5182 bool checkContinuous(int dims, const size_t sz[],
5183 const size_t srcofs[], const size_t srcstep[],
5184 const size_t dstofs[], const size_t dststep[],
5185 size_t& total, size_t new_sz[],
5186 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5187 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5189 bool iscontinuous = true;
5190 srcrawofs = srcofs ? srcofs[dims-1] : 0;
5191 dstrawofs = dstofs ? dstofs[dims-1] : 0;
5193 for( int i = dims-2; i >= 0; i-- )
5195 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5196 iscontinuous = false;
5199 srcrawofs += srcofs[i]*srcstep[i];
5201 dstrawofs += dstofs[i]*dststep[i];
5206 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5209 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5210 // we assume that new_... arrays are initialized by caller
5211 // with 0's, so there is no else branch
5214 new_srcofs[0] = srcofs[1];
5215 new_srcofs[1] = srcofs[0];
5221 new_dstofs[0] = dstofs[1];
5222 new_dstofs[1] = dstofs[0];
5226 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5227 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5231 // we could check for dims == 3 here,
5232 // but from user perspective this one is more informative
5233 CV_Assert(dims <= 3);
5234 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
5237 new_srcofs[0] = srcofs[2];
5238 new_srcofs[1] = srcofs[1];
5239 new_srcofs[2] = srcofs[0];
5244 new_dstofs[0] = dstofs[2];
5245 new_dstofs[1] = dstofs[1];
5246 new_dstofs[2] = dstofs[0];
5249 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
5250 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
5253 return iscontinuous;
5256 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
5257 const size_t srcofs[], const size_t srcstep[],
5258 const size_t dststep[]) const CV_OVERRIDE
5262 UMatDataAutoLock autolock(u);
5264 if( u->data && !u->hostCopyObsolete() )
5266 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
5269 CV_Assert( u->handle != 0 );
5271 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5273 size_t total = 0, new_sz[] = {0, 0, 0};
5274 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5275 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5277 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
5279 srcrawofs, new_srcofs, new_srcstep,
5280 dstrawofs, new_dstofs, new_dststep);
5282 #ifdef HAVE_OPENCL_SVM
5283 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5285 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5286 Context& ctx = Context::getDefault();
5287 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5288 CV_DbgAssert(svmFns->isValid());
5290 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5291 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5293 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5294 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5297 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5302 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
5306 // This code is from MatAllocator::download()
5307 int isz[CV_MAX_DIM];
5308 uchar* srcptr = (uchar*)u->handle;
5309 for( int i = 0; i < dims; i++ )
5311 CV_Assert( sz[i] <= (size_t)INT_MAX );
5315 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5316 isz[i] = (int)sz[i];
5319 Mat src(dims, isz, CV_8U, srcptr, srcstep);
5320 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5322 const Mat* arrays[] = { &src, &dst };
5324 NAryMatIterator it(arrays, ptrs, 2);
5325 size_t j, planesz = it.size;
5327 for( j = 0; j < it.nplanes; j++, ++it )
5328 memcpy(ptrs[1], ptrs[0], planesz);
5330 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5332 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5333 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5335 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5344 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5345 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5346 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
5348 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5350 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5351 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5352 size_t membuf_ofs = srcrawofs - new_srcrawofs;
5353 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5354 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5355 uchar* ptr = alignedPtr.getAlignedPtr();
5357 CV_Assert(new_srcstep[0] >= new_sz[0]);
5358 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
5359 total = std::min(total, u->size - new_srcrawofs);
5360 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5361 new_srcrawofs, total, ptr, 0, 0, 0));
5362 for( size_t i = 0; i < new_sz[1]; i++ )
5363 memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
5367 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5368 uchar* ptr = alignedPtr.getAlignedPtr();
5370 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5371 new_srcofs, new_dstofs, new_sz,
5379 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
5380 const size_t dstofs[], const size_t dststep[],
5381 const size_t srcstep[]) const CV_OVERRIDE
5386 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5387 CV_Assert(u->refcount == 0 || u->tempUMat());
5389 size_t total = 0, new_sz[] = {0, 0, 0};
5390 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5391 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5393 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
5395 srcrawofs, new_srcofs, new_srcstep,
5396 dstrawofs, new_dstofs, new_dststep);
5398 UMatDataAutoLock autolock(u);
5400 // if there is cached CPU copy of the GPU matrix,
5401 // we could use it as a destination.
5402 // we can do it in 2 cases:
5403 // 1. we overwrite the whole content
5404 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
5405 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
5407 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
5408 u->markHostCopyObsolete(false);
5409 u->markDeviceCopyObsolete(true);
5413 CV_Assert( u->handle != 0 );
5414 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5416 #ifdef HAVE_OPENCL_SVM
5417 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5419 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5420 Context& ctx = Context::getDefault();
5421 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5422 CV_DbgAssert(svmFns->isValid());
5424 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5425 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5427 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5428 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
5431 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5436 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
5440 // This code is from MatAllocator::upload()
5441 int isz[CV_MAX_DIM];
5442 uchar* dstptr = (uchar*)u->handle;
5443 for( int i = 0; i < dims; i++ )
5445 CV_Assert( sz[i] <= (size_t)INT_MAX );
5449 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
5450 isz[i] = (int)sz[i];
5453 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
5454 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5456 const Mat* arrays[] = { &src, &dst };
5458 NAryMatIterator it(arrays, ptrs, 2);
5459 size_t j, planesz = it.size;
5461 for( j = 0; j < it.nplanes; j++, ++it )
5462 memcpy(ptrs[1], ptrs[0], planesz);
5464 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5466 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5467 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5469 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5478 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5479 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5480 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
5481 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
5482 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5484 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5486 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5487 size_t new_dstrawofs = dstrawofs & ~(padding-1);
5488 size_t membuf_ofs = dstrawofs - new_dstrawofs;
5489 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
5490 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5491 uchar* ptr = alignedPtr.getAlignedPtr();
5493 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5494 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
5495 total = std::min(total, u->size - new_dstrawofs);
5496 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
5497 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
5498 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
5499 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5500 new_dstrawofs, total, ptr, 0, 0, 0));
5501 for( size_t i = 0; i < new_sz[1]; i++ )
5502 memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
5503 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5504 new_dstrawofs, total, ptr, 0, 0, 0));
5508 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5509 uchar* ptr = alignedPtr.getAlignedPtr();
5511 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5512 new_dstofs, new_srcofs, new_sz,
5518 u->markHostCopyObsolete(true);
5519 #ifdef HAVE_OPENCL_SVM
5520 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5521 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5528 u->markHostCopyObsolete(true);
5530 u->markDeviceCopyObsolete(false);
5533 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
5534 const size_t srcofs[], const size_t srcstep[],
5535 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
5540 size_t total = 0, new_sz[] = {0, 0, 0};
5541 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5542 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5544 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
5546 srcrawofs, new_srcofs, new_srcstep,
5547 dstrawofs, new_dstofs, new_dststep);
5549 UMatDataAutoLock src_autolock(src, dst);
5551 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
5553 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
5556 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
5558 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
5559 dst->markHostCopyObsolete(false);
5560 #ifdef HAVE_OPENCL_SVM
5561 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5562 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5569 dst->markDeviceCopyObsolete(true);
5574 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5575 CV_Assert(dst->refcount == 0);
5576 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5578 cl_int retval = CL_SUCCESS;
5579 #ifdef HAVE_OPENCL_SVM
5580 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
5581 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5583 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
5584 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5586 Context& ctx = Context::getDefault();
5587 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5588 CV_DbgAssert(svmFns->isValid());
5592 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
5593 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
5594 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
5595 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
5596 total, 0, NULL, NULL);
5597 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
5602 // This code is from MatAllocator::download()/upload()
5603 int isz[CV_MAX_DIM];
5604 uchar* srcptr = (uchar*)src->handle;
5605 for( int i = 0; i < dims; i++ )
5607 CV_Assert( sz[i] <= (size_t)INT_MAX );
5611 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5612 isz[i] = (int)sz[i];
5614 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
5616 uchar* dstptr = (uchar*)dst->handle;
5617 for( int i = 0; i < dims; i++ )
5620 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
5622 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
5624 const Mat* arrays[] = { &m_src, &m_dst };
5626 NAryMatIterator it(arrays, ptrs, 2);
5627 size_t j, planesz = it.size;
5629 for( j = 0; j < it.nplanes; j++, ++it )
5630 memcpy(ptrs[1], ptrs[0], planesz);
5635 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5637 map(src, ACCESS_READ);
5638 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
5643 map(dst, ACCESS_WRITE);
5644 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
5654 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
5655 srcrawofs, dstrawofs, total, 0, 0, 0);
5656 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
5657 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
5659 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5661 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5662 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5663 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
5664 size_t new_dstrawofs = dstrawofs & ~(padding-1);
5665 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
5667 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5668 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5669 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
5670 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5671 uchar* srcptr = srcBuf.getAlignedPtr();
5672 uchar* dstptr = dstBuf.getAlignedPtr();
5674 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5676 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
5677 src_total = std::min(src_total, src->size - new_srcrawofs);
5678 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
5679 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
5681 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
5682 new_srcrawofs, src_total, srcptr, 0, 0, 0));
5683 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5684 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5686 for( size_t i = 0; i < new_sz[1]; i++ )
5687 memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
5688 srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
5689 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5690 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5694 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
5695 new_srcofs, new_dstofs, new_sz,
5701 if (retval == CL_SUCCESS)
5703 CV_IMPL_ADD(CV_IMPL_OCL)
5706 #ifdef HAVE_OPENCL_SVM
5707 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5708 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5715 dst->markHostCopyObsolete(true);
5717 dst->markDeviceCopyObsolete(false);
5721 CV_OCL_DBG_CHECK(clFinish(q));
5725 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE {
5726 #ifdef HAVE_OPENCL_SVM
5727 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
5729 return &bufferPoolSVM;
5732 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
5734 return &bufferPoolHostPtr;
5736 if (id != NULL && strcmp(id, "OCL") != 0)
5738 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
5743 MatAllocator* matStdAllocator;
5745 mutable cv::Mutex cleanupQueueMutex;
5746 mutable std::deque<UMatData*> cleanupQueue;
5748 void flushCleanupQueue() const
5750 if (!cleanupQueue.empty())
5752 std::deque<UMatData*> q;
5754 cv::AutoLock lock(cleanupQueueMutex);
5755 q.swap(cleanupQueue);
5757 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
5763 void addToCleanupQueue(UMatData* u) const
5765 //TODO: Validation check: CV_Assert(!u->tempUMat());
5767 cv::AutoLock lock(cleanupQueueMutex);
5768 cleanupQueue.push_back(u);
5773 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
5775 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
5776 g_isOpenCVActivated = true;
5779 MatAllocator* getOpenCLAllocator()
5781 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
5784 }} // namespace cv::ocl
5789 // three funcs below are implemented in umatrix.cpp
5790 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
5791 bool autoSteps = false );
5792 void finalizeHdr(UMat& m);
5797 namespace cv { namespace ocl {
5800 // Convert OpenCL buffer memory to UMat
5802 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
5805 int sizes[] = { rows, cols };
5807 CV_Assert(0 <= d && d <= CV_MAX_DIM);
5811 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
5812 dst.usageFlags = USAGE_DEFAULT;
5814 setSize(dst, d, sizes, 0, true);
5817 cl_mem memobj = (cl_mem)cl_mem_buffer;
5818 cl_mem_object_type mem_type = 0;
5820 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
5822 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
5825 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
5827 CV_OCL_CHECK(clRetainMemObject(memobj));
5829 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
5830 CV_Assert(total >= rows * step);
5832 // attach clBuffer to UMatData
5833 dst.u = new UMatData(getOpenCLAllocator());
5835 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
5837 dst.u->handle = cl_mem_buffer;
5838 dst.u->origdata = 0;
5839 dst.u->prevAllocator = 0;
5840 dst.u->size = total;
5846 } // convertFromBuffer()
5850 // Convert OpenCL image2d_t memory to UMat
5852 void convertFromImage(void* cl_mem_image, UMat& dst)
5854 cl_mem clImage = (cl_mem)cl_mem_image;
5855 cl_mem_object_type mem_type = 0;
5857 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
5859 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
5861 cl_image_format fmt = { 0, 0 };
5862 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
5865 switch (fmt.image_channel_data_type)
5868 case CL_UNSIGNED_INT8:
5873 case CL_SIGNED_INT8:
5877 case CL_UNORM_INT16:
5878 case CL_UNSIGNED_INT16:
5882 case CL_SNORM_INT16:
5883 case CL_SIGNED_INT16:
5887 case CL_SIGNED_INT32:
5896 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
5900 switch (fmt.image_channel_order)
5903 type = CV_MAKE_TYPE(depth, 1);
5909 type = CV_MAKE_TYPE(depth, 4);
5913 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
5918 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
5921 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
5924 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
5926 dst.create((int)h, (int)w, type);
5928 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
5930 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5933 size_t src_origin[3] = { 0, 0, 0 };
5934 size_t region[3] = { w, h, 1 };
5935 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
5937 CV_OCL_CHECK(clFinish(q));
5940 } // convertFromImage()
5943 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
5945 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
5947 cl_uint numDevices = 0;
5948 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
5949 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
5951 CV_OCL_DBG_CHECK_RESULT(status,
5952 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
5955 if (numDevices == 0)
5961 devices.resize((size_t)numDevices);
5962 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
5965 struct PlatformInfo::Impl
5970 handle = *(cl_platform_id*)id;
5971 getDevices(devices, handle);
5973 version_ = getStrProp(CL_PLATFORM_VERSION);
5974 parseOpenCLVersion(version_, versionMajor_, versionMinor_);
5977 String getStrProp(cl_platform_info prop) const
5981 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
5982 sz < sizeof(buf) ? String(buf) : String();
5985 IMPLEMENT_REFCOUNTABLE();
5986 std::vector<cl_device_id> devices;
5987 cl_platform_id handle;
5994 PlatformInfo::PlatformInfo()
5999 PlatformInfo::PlatformInfo(void* platform_id)
6001 p = new Impl(platform_id);
6004 PlatformInfo::~PlatformInfo()
6010 PlatformInfo::PlatformInfo(const PlatformInfo& i)
6017 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
6030 int PlatformInfo::deviceNumber() const
6032 return p ? (int)p->devices.size() : 0;
6035 void PlatformInfo::getDevice(Device& device, int d) const
6037 CV_Assert(p && d < (int)p->devices.size() );
6039 device.set(p->devices[d]);
6042 String PlatformInfo::name() const
6044 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
6047 String PlatformInfo::vendor() const
6049 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6052 String PlatformInfo::version() const
6054 return p ? p->version_ : String();
6057 int PlatformInfo::versionMajor() const
6060 return p->versionMajor_;
6063 int PlatformInfo::versionMinor() const
6066 return p->versionMinor_;
6069 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6071 cl_uint numPlatforms = 0;
6072 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6074 if (numPlatforms == 0)
6080 platforms.resize((size_t)numPlatforms);
6081 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6084 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6086 std::vector<cl_platform_id> platforms;
6087 getPlatforms(platforms);
6089 for (size_t i = 0; i < platforms.size(); i++)
6090 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6093 const char* typeToStr(int type)
6095 static const char* tab[]=
6097 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6098 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6099 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6100 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6101 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6102 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6103 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6104 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6106 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6107 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6110 const char* memopTypeToStr(int type)
6112 static const char* tab[] =
6114 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6115 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6116 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6117 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6118 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6119 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6120 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6121 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6123 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6124 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6127 const char* vecopTypeToStr(int type)
6129 static const char* tab[] =
6131 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6132 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6133 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6134 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6135 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6136 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6137 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6138 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6140 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6141 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6144 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6146 if( sdepth == ddepth )
6148 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6149 if( ddepth >= CV_32F ||
6150 (ddepth == CV_32S && sdepth < CV_32S) ||
6151 (ddepth == CV_16S && sdepth <= CV_8S) ||
6152 (ddepth == CV_16U && sdepth == CV_8U))
6154 sprintf(buf, "convert_%s", typestr);
6156 else if( sdepth >= CV_32F )
6157 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6159 sprintf(buf, "convert_%s_sat", typestr);
6164 const char* getOpenCLErrorString(int errorCode)
6166 #define CV_OCL_CODE(id) case id: return #id
6167 #define CV_OCL_CODE_(id, name) case id: return #name
6170 CV_OCL_CODE(CL_SUCCESS);
6171 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6172 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6173 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6174 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6175 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6176 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6177 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6178 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6179 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6180 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6181 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
6182 CV_OCL_CODE(CL_MAP_FAILURE);
6183 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
6184 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
6185 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
6186 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
6187 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
6188 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
6189 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
6190 CV_OCL_CODE(CL_INVALID_VALUE);
6191 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
6192 CV_OCL_CODE(CL_INVALID_PLATFORM);
6193 CV_OCL_CODE(CL_INVALID_DEVICE);
6194 CV_OCL_CODE(CL_INVALID_CONTEXT);
6195 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
6196 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
6197 CV_OCL_CODE(CL_INVALID_HOST_PTR);
6198 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
6199 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
6200 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
6201 CV_OCL_CODE(CL_INVALID_SAMPLER);
6202 CV_OCL_CODE(CL_INVALID_BINARY);
6203 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
6204 CV_OCL_CODE(CL_INVALID_PROGRAM);
6205 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
6206 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
6207 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
6208 CV_OCL_CODE(CL_INVALID_KERNEL);
6209 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
6210 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
6211 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
6212 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
6213 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
6214 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
6215 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
6216 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
6217 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
6218 CV_OCL_CODE(CL_INVALID_EVENT);
6219 CV_OCL_CODE(CL_INVALID_OPERATION);
6220 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
6221 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
6222 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
6223 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
6225 CV_OCL_CODE(CL_INVALID_PROPERTY);
6227 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
6228 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
6229 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
6230 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
6232 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
6233 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
6235 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
6236 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
6237 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
6238 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
6239 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
6240 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
6241 default: return "Unknown OpenCL error";
6247 template <typename T>
6248 static std::string kerToStr(const Mat & k)
6250 int width = k.cols - 1, depth = k.depth();
6251 const T * const data = k.ptr<T>();
6253 std::ostringstream stream;
6254 stream.precision(10);
6258 for (int i = 0; i < width; ++i)
6259 stream << "DIG(" << (int)data[i] << ")";
6260 stream << "DIG(" << (int)data[width] << ")";
6262 else if (depth == CV_32F)
6264 stream.setf(std::ios_base::showpoint);
6265 for (int i = 0; i < width; ++i)
6266 stream << "DIG(" << data[i] << "f)";
6267 stream << "DIG(" << data[width] << "f)";
6271 for (int i = 0; i < width; ++i)
6272 stream << "DIG(" << data[i] << ")";
6273 stream << "DIG(" << data[width] << ")";
6276 return stream.str();
6279 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
6281 Mat kernel = _kernel.getMat().reshape(1, 1);
6283 int depth = kernel.depth();
6287 if (ddepth != depth)
6288 kernel.convertTo(kernel, ddepth);
6290 typedef std::string (* func_t)(const Mat &);
6291 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
6292 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
6293 const func_t func = funcs[ddepth];
6294 CV_Assert(func != 0);
6296 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
6299 #define PROCESS_SRC(src) \
6304 CV_Assert(src.isMat() || src.isUMat()); \
6305 Size csize = src.size(); \
6306 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
6307 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
6308 if (cwidth < ckercn || ckercn <= 0) \
6310 cols.push_back(cwidth); \
6311 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
6313 offsets.push_back(src.offset()); \
6314 steps.push_back(src.step()); \
6315 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
6316 kercns.push_back(ckercn); \
6321 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
6322 InputArray src4, InputArray src5, InputArray src6,
6323 InputArray src7, InputArray src8, InputArray src9,
6324 OclVectorStrategy strat)
6326 const ocl::Device & d = ocl::Device::getDefault();
6328 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
6329 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
6330 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
6331 d.preferredVectorWidthDouble(), -1 };
6333 // if the device says don't use vectors
6334 if (vectorWidths[0] == 1)
6337 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
6338 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
6339 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
6342 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
6345 int checkOptimalVectorWidth(const int *vectorWidths,
6346 InputArray src1, InputArray src2, InputArray src3,
6347 InputArray src4, InputArray src5, InputArray src6,
6348 InputArray src7, InputArray src8, InputArray src9,
6349 OclVectorStrategy strat)
6351 CV_Assert(vectorWidths);
6353 int ref_type = src1.type();
6355 std::vector<size_t> offsets, steps, cols;
6356 std::vector<int> dividers, kercns;
6367 size_t size = offsets.size();
6369 for (size_t i = 0; i < size; ++i)
6370 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
6371 dividers[i] >>= 1, kercns[i] >>= 1;
6374 int kercn = *std::min_element(kercns.begin(), kercns.end());
6379 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
6380 InputArray src4, InputArray src5, InputArray src6,
6381 InputArray src7, InputArray src8, InputArray src9)
6383 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
6389 // TODO Make this as a method of OpenCL "BuildOptions" class
6390 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
6392 if (!buildOptions.empty())
6393 buildOptions += " ";
6394 int type = _m.type(), depth = CV_MAT_DEPTH(type);
6395 buildOptions += format(
6396 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
6397 name.c_str(), ocl::typeToStr(type),
6398 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
6399 name.c_str(), (int)CV_MAT_CN(type),
6400 name.c_str(), (int)CV_ELEM_SIZE(type),
6401 name.c_str(), (int)CV_ELEM_SIZE1(type),
6402 name.c_str(), (int)depth
6407 struct Image2D::Impl
6409 Impl(const UMat &src, bool norm, bool alias)
6413 init(src, norm, alias);
6419 clReleaseMemObject(handle);
6422 static cl_image_format getImageFormat(int depth, int cn, bool norm)
6424 cl_image_format format;
6425 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
6426 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
6427 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
6428 CL_SNORM_INT16, -1, -1, -1, -1 };
6429 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
6431 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
6432 int channelOrder = channelOrders[cn];
6433 format.image_channel_data_type = (cl_channel_type)channelType;
6434 format.image_channel_order = (cl_channel_order)channelOrder;
6438 static bool isFormatSupported(cl_image_format format)
6441 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
6443 cl_context context = (cl_context)Context::getDefault().ptr();
6447 // Figure out how many formats are supported by this context.
6448 cl_uint numFormats = 0;
6449 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6450 CL_MEM_OBJECT_IMAGE2D, numFormats,
6452 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
6455 AutoBuffer<cl_image_format> formats(numFormats);
6456 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6457 CL_MEM_OBJECT_IMAGE2D, numFormats,
6458 formats.data(), NULL);
6459 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
6460 for (cl_uint i = 0; i < numFormats; ++i)
6462 if (!memcmp(&formats[i], &format, sizeof(format)))
6471 void init(const UMat &src, bool norm, bool alias)
6474 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
6476 CV_Assert(!src.empty());
6477 CV_Assert(ocl::Device::getDefault().imageSupport());
6479 int err, depth = src.depth(), cn = src.channels();
6481 cl_image_format format = getImageFormat(depth, cn, norm);
6483 if (!isFormatSupported(format))
6484 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
6486 if (alias && !src.handle(ACCESS_RW))
6487 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
6489 cl_context context = (cl_context)Context::getDefault().ptr();
6490 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
6492 #ifdef CL_VERSION_1_2
6493 // this enables backwards portability to
6494 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
6495 const Device & d = ocl::Device::getDefault();
6496 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
6497 CV_Assert(!alias || canCreateAlias(src));
6498 if (1 < major || (1 == major && 2 <= minor))
6501 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
6502 desc.image_width = src.cols;
6503 desc.image_height = src.rows;
6504 desc.image_depth = 0;
6505 desc.image_array_size = 1;
6506 desc.image_row_pitch = alias ? src.step[0] : 0;
6507 desc.image_slice_pitch = 0;
6508 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
6509 desc.num_mip_levels = 0;
6510 desc.num_samples = 0;
6511 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
6516 CV_SUPPRESS_DEPRECATED_START
6517 CV_Assert(!alias); // This is an OpenCL 1.2 extension
6518 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
6519 CV_SUPPRESS_DEPRECATED_END
6521 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
6523 size_t origin[] = { 0, 0, 0 };
6524 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
6527 if (!alias && !src.isContinuous())
6529 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
6530 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
6531 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
6534 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
6535 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
6536 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
6537 CV_OCL_DBG_CHECK(clFlush(queue));
6541 devData = (cl_mem)src.handle(ACCESS_READ);
6543 CV_Assert(devData != NULL);
6547 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
6548 if (!src.isContinuous())
6550 CV_OCL_DBG_CHECK(clFlush(queue));
6551 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
6556 IMPLEMENT_REFCOUNTABLE();
6566 Image2D::Image2D(const UMat &src, bool norm, bool alias)
6568 p = new Impl(src, norm, alias);
6571 bool Image2D::canCreateAlias(const UMat &m)
6574 const Device & d = ocl::Device::getDefault();
6575 if (d.imageFromBufferSupport() && !m.empty())
6577 // This is the required pitch alignment in pixels
6578 uint pitchAlign = d.imagePitchAlignment();
6579 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
6581 // We don't currently handle the case where the buffer was created
6582 // with CL_MEM_USE_HOST_PTR
6583 if (!m.u->tempUMat())
6592 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
6594 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
6596 return Impl::isFormatSupported(format);
6599 Image2D::Image2D(const Image2D & i)
6606 Image2D & Image2D::operator = (const Image2D & i)
6625 void* Image2D::ptr() const
6627 return p ? p->handle : 0;
6630 bool internal::isOpenCLForced()
6632 static bool initialized = false;
6633 static bool value = false;
6636 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
6642 bool internal::isPerformanceCheckBypassed()
6644 static bool initialized = false;
6645 static bool value = false;
6648 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
6654 bool internal::isCLBuffer(UMat& u)
6656 void* h = u.handle(ACCESS_RW);
6659 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
6661 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
6664 cl_mem_object_type type = 0;
6665 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
6666 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
6676 Impl(const Queue& q)
6685 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
6691 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
6695 uint64 durationNS() const
6697 return (uint64)(timer.getTimeSec() * 1e9);
6703 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
6704 Timer::~Timer() { delete p; }
6718 uint64 Timer::durationNS() const
6721 return p->durationNS();
6726 #endif // HAVE_OPENCL