1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
45 #include "ocl_disabled.impl.hpp"
54 #include <iostream> // std::cerr
56 #if !(defined _MSC_VER) || (defined _MSC_VER && _MSC_VER > 1700)
60 #include <opencv2/core/utils/configuration.private.hpp>
62 #include <opencv2/core/utils/logger.defines.hpp>
63 #undef CV_LOG_STRIP_LEVEL
64 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
65 #include <opencv2/core/utils/logger.hpp>
67 #include "opencv2/core/ocl_genbase.hpp"
68 #include "opencl_kernels_core.hpp"
70 #include "opencv2/core/utils/lock.private.hpp"
71 #include "opencv2/core/utils/filesystem.hpp"
72 #include "opencv2/core/utils/filesystem.private.hpp"
74 #define CV__ALLOCATOR_STATS_LOG(...) CV_LOG_VERBOSE(NULL, 0, "OpenCL allocator: " << __VA_ARGS__)
75 #include "opencv2/core/utils/allocator_stats.impl.hpp"
76 #undef CV__ALLOCATOR_STATS_LOG
78 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
80 #define CV_OPENCL_SHOW_RUN_KERNELS 0
81 #define CV_OPENCL_TRACE_CHECK 0
83 #define CV_OPENCL_VALIDATE_BINARY_PROGRAMS 1
85 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
86 #define CV_OPENCL_SHOW_SVM_LOG 0
88 #include "opencv2/core/bufferpool.hpp"
89 #ifndef LOG_BUFFER_POOL
91 # define LOG_BUFFER_POOL printf
93 # define LOG_BUFFER_POOL(...)
97 #if CV_OPENCL_SHOW_SVM_LOG
98 // TODO add timestamp logging
99 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
101 #define CV_OPENCL_SVM_TRACE_P(...)
104 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
105 // TODO add timestamp logging
106 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
108 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
111 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
112 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
114 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
116 #ifdef HAVE_OPENCL_SVM
117 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
118 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
119 #include "opencv2/core/opencl/opencl_svm.hpp"
122 #include "umatrix.hpp"
124 namespace cv { namespace ocl {
126 #define IMPLEMENT_REFCOUNTABLE() \
127 void addref() { CV_XADD(&refcount, 1); } \
128 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
131 static cv::utils::AllocatorStatistics opencl_allocator_stats;
133 CV_EXPORTS cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics();
134 cv::utils::AllocatorStatisticsInterface& getOpenCLAllocatorStatistics()
136 return opencl_allocator_stats;
140 static bool isRaiseError()
142 static bool initialized = false;
143 static bool value = false;
146 value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
153 #if CV_OPENCL_TRACE_CHECK
155 void traceOpenCLCheck(cl_int status, const char* message)
157 std::cout << "OpenCV(OpenCL:" << status << "): " << message << std::endl << std::flush;
159 #define CV_OCL_TRACE_CHECK_RESULT(status, message) traceOpenCLCheck(status, message)
161 #define CV_OCL_TRACE_CHECK_RESULT(status, message) /* nothing */
164 #define CV_OCL_API_ERROR_MSG(check_result, msg) \
165 cv::format("OpenCL error %s (%d) during call: %s", getOpenCLErrorString(check_result), check_result, msg)
167 #define CV_OCL_CHECK_RESULT(check_result, msg) \
169 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
170 if (check_result != CL_SUCCESS) \
172 if (0) { const char* msg_ = (msg); CV_UNUSED(msg_); /* ensure const char* type (cv::String without c_str()) */ } \
173 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
174 CV_Error(Error::OpenCLApiCallError, error_msg); \
178 #define CV_OCL_CHECK_(expr, check_result) do { expr; CV_OCL_CHECK_RESULT(check_result, #expr); } while (0)
180 #define CV_OCL_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_CHECK_RESULT(__cl_result, #expr); } while (0)
183 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) CV_OCL_CHECK_RESULT(check_result, msg)
184 #define CV_OCL_DBG_CHECK(expr) CV_OCL_CHECK(expr)
185 #define CV_OCL_DBG_CHECK_(expr, check_result) CV_OCL_CHECK_(expr, check_result)
187 #define CV_OCL_DBG_CHECK_RESULT(check_result, msg) \
189 CV_OCL_TRACE_CHECK_RESULT(check_result, msg); \
190 if (check_result != CL_SUCCESS && isRaiseError()) \
192 if (0) { const char* msg_ = (msg); CV_UNUSED(msg_); /* ensure const char* type (cv::String without c_str()) */ } \
193 cv::String error_msg = CV_OCL_API_ERROR_MSG(check_result, msg); \
194 CV_Error(Error::OpenCLApiCallError, error_msg); \
197 #define CV_OCL_DBG_CHECK_(expr, check_result) do { expr; CV_OCL_DBG_CHECK_RESULT(check_result, #expr); } while (0)
198 #define CV_OCL_DBG_CHECK(expr) do { cl_int __cl_result = (expr); CV_OCL_DBG_CHECK_RESULT(__cl_result, #expr); } while (0)
202 static const bool CV_OPENCL_CACHE_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_ENABLE", true);
203 static const bool CV_OPENCL_CACHE_WRITE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_WRITE", true);
204 static const bool CV_OPENCL_CACHE_LOCK_ENABLE = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_LOCK_ENABLE", true);
205 static const bool CV_OPENCL_CACHE_CLEANUP = utils::getConfigurationParameterBool("OPENCV_OPENCL_CACHE_CLEANUP", true);
207 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
208 static const bool CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE = utils::getConfigurationParameterBool("OPENCV_OPENCL_VALIDATE_BINARY_PROGRAMS", false);
211 // Option to disable calls clEnqueueReadBufferRect / clEnqueueWriteBufferRect / clEnqueueCopyBufferRect
212 static const bool CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS = utils::getConfigurationParameterBool("OPENCV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS",
220 static const String getBuildExtraOptions()
222 static String param_buildExtraOptions;
223 static bool initialized = false;
226 param_buildExtraOptions = utils::getConfigurationParameterString("OPENCV_OPENCL_BUILD_EXTRA_OPTIONS", "");
228 if (!param_buildExtraOptions.empty())
229 CV_LOG_WARNING(NULL, "OpenCL: using extra build options: '" << param_buildExtraOptions << "'");
231 return param_buildExtraOptions;
234 static const bool CV_OPENCL_ENABLE_MEM_USE_HOST_PTR = utils::getConfigurationParameterBool("OPENCV_OPENCL_ENABLE_MEM_USE_HOST_PTR", true);
235 static const size_t CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR", 4);
240 UMat2D(const UMat& m)
242 offset = (int)m.offset;
255 UMat3D(const UMat& m)
257 offset = (int)m.offset;
258 step = (int)m.step.p[1];
259 slicestep = (int)m.step.p[0];
260 slices = (int)m.size.p[0];
272 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
273 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
275 static uint64 table[256];
276 static bool initialized = false;
280 for( int i = 0; i < 256; i++ )
283 for( int j = 0; j < 8; j++ )
284 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
291 for( size_t idx = 0; idx < size; idx++ )
292 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
297 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
298 struct OpenCLBinaryCacheConfigurator
300 cv::String cache_path_;
301 cv::String cache_lock_filename_;
302 cv::Ptr<utils::fs::FileLock> cache_lock_;
304 typedef std::map<std::string, std::string> ContextCacheType;
305 ContextCacheType prepared_contexts_;
306 Mutex mutex_prepared_contexts_;
308 OpenCLBinaryCacheConfigurator()
310 CV_LOG_DEBUG(NULL, "Initializing OpenCL cache configuration...");
311 if (!CV_OPENCL_CACHE_ENABLE)
313 CV_LOG_INFO(NULL, "OpenCL cache is disabled");
316 cache_path_ = utils::fs::getCacheDirectory("opencl_cache", "OPENCV_OPENCL_CACHE_DIR");
317 if (cache_path_.empty())
319 CV_LOG_INFO(NULL, "Specify OPENCV_OPENCL_CACHE_DIR configuration parameter to enable OpenCL cache");
325 if (cache_path_.empty())
327 if (cache_path_ == "disabled")
329 if (!utils::fs::createDirectories(cache_path_))
331 CV_LOG_DEBUG(NULL, "Can't use OpenCL cache directory: " << cache_path_);
336 if (CV_OPENCL_CACHE_LOCK_ENABLE)
338 cache_lock_filename_ = cache_path_ + ".lock";
339 if (!utils::fs::exists(cache_lock_filename_))
341 CV_LOG_DEBUG(NULL, "Creating lock file... (" << cache_lock_filename_ << ")");
342 std::ofstream lock_filename(cache_lock_filename_.c_str(), std::ios::out);
343 if (!lock_filename.is_open())
345 CV_LOG_WARNING(NULL, "Can't create lock file for OpenCL program cache: " << cache_lock_filename_);
352 cache_lock_ = makePtr<utils::fs::FileLock>(cache_lock_filename_.c_str());
353 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... (" << cache_lock_filename_ << ")");
355 utils::shared_lock_guard<utils::fs::FileLock> lock(*cache_lock_);
357 CV_LOG_VERBOSE(NULL, 0, "Checking cache lock... Done!");
359 catch (const cv::Exception& e)
361 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_ << std::endl << e.what());
365 CV_LOG_WARNING(NULL, "Can't create OpenCL program cache lock: " << cache_lock_filename_);
370 if (CV_OPENCL_CACHE_WRITE)
372 CV_LOG_WARNING(NULL, "OpenCL cache lock is disabled while cache write is allowed "
373 "(not safe for multiprocess environment)");
377 CV_LOG_INFO(NULL, "OpenCL cache lock is disabled");
381 catch (const cv::Exception& e)
383 CV_LOG_WARNING(NULL, "Can't prepare OpenCL program cache: " << cache_path_ << std::endl << e.what());
387 if (!cache_path_.empty())
389 if (cache_lock_.empty() && CV_OPENCL_CACHE_LOCK_ENABLE)
391 CV_LOG_WARNING(NULL, "Initialized OpenCL cache directory, but interprocess synchronization lock is not available. "
392 "Consider to disable OpenCL cache: OPENCV_OPENCL_CACHE_DIR=disabled");
396 CV_LOG_INFO(NULL, "Successfully initialized OpenCL cache directory: " << cache_path_);
404 cache_lock_filename_.clear();
405 cache_lock_.release();
408 std::string prepareCacheDirectoryForContext(const std::string& ctx_prefix,
409 const std::string& cleanup_prefix)
411 if (cache_path_.empty())
412 return std::string();
414 AutoLock lock(mutex_prepared_contexts_);
416 ContextCacheType::iterator found_it = prepared_contexts_.find(ctx_prefix);
417 if (found_it != prepared_contexts_.end())
418 return found_it->second;
420 CV_LOG_INFO(NULL, "Preparing OpenCL cache configuration for context: " << ctx_prefix);
422 std::string target_directory = cache_path_ + ctx_prefix + "/";
423 bool result = utils::fs::isDirectory(target_directory);
428 CV_LOG_VERBOSE(NULL, 0, "Creating directory: " << target_directory);
429 if (utils::fs::createDirectories(target_directory))
435 CV_LOG_WARNING(NULL, "Can't create directory: " << target_directory);
438 catch (const cv::Exception& e)
440 CV_LOG_ERROR(NULL, "Can't create OpenCL program cache directory for context: " << target_directory << std::endl << e.what());
443 target_directory = result ? target_directory : std::string();
444 prepared_contexts_.insert(std::pair<std::string, std::string>(ctx_prefix, target_directory));
446 if (result && CV_OPENCL_CACHE_CLEANUP && CV_OPENCL_CACHE_WRITE && !cleanup_prefix.empty())
450 std::vector<String> entries;
451 utils::fs::glob_relative(cache_path_, cleanup_prefix + "*", entries, false, true);
452 std::vector<String> remove_entries;
453 for (size_t i = 0; i < entries.size(); i++)
455 const String& name = entries[i];
456 if (0 == name.find(cleanup_prefix))
458 if (0 == name.find(ctx_prefix))
459 continue; // skip current
460 remove_entries.push_back(name);
463 if (!remove_entries.empty())
465 CV_LOG_WARNING(NULL, (remove_entries.size() == 1
466 ? "Detected OpenCL cache directory for other version of OpenCL device."
467 : "Detected OpenCL cache directories for other versions of OpenCL device.")
468 << " We assume that these directories are obsolete after OpenCL runtime/drivers upgrade.");
469 CV_LOG_WARNING(NULL, "Trying to remove these directories...");
470 for (size_t i = 0; i < remove_entries.size(); i++)
472 CV_LOG_WARNING(NULL, "- " << remove_entries[i]);
474 CV_LOG_WARNING(NULL, "Note: You can disable this behavior via this option: OPENCV_OPENCL_CACHE_CLEANUP=0");
476 for (size_t i = 0; i < remove_entries.size(); i++)
478 const String& name = remove_entries[i];
479 cv::String path = utils::fs::join(cache_path_, name);
482 utils::fs::remove_all(path);
483 CV_LOG_WARNING(NULL, "Removed: " << path);
485 catch (const cv::Exception& e)
487 CV_LOG_ERROR(NULL, "Exception during removal of obsolete OpenCL cache directory: " << path << std::endl << e.what());
494 CV_LOG_WARNING(NULL, "Can't check for obsolete OpenCL cache directories");
498 CV_LOG_VERBOSE(NULL, 1, " Result: " << (target_directory.empty() ? std::string("Failed") : target_directory));
499 return target_directory;
502 static OpenCLBinaryCacheConfigurator& getSingletonInstance()
504 CV_SINGLETON_LAZY_INIT_REF(OpenCLBinaryCacheConfigurator, new OpenCLBinaryCacheConfigurator());
507 class BinaryProgramFile
509 enum { MAX_ENTRIES = 64 };
511 typedef unsigned int uint32_t;
513 struct CV_DECL_ALIGNED(4) FileHeader
515 uint32_t sourceSignatureSize;
516 //char sourceSignature[];
519 struct CV_DECL_ALIGNED(4) FileTable
521 uint32_t numberOfEntries;
522 //uint32_t firstEntryOffset[];
525 struct CV_DECL_ALIGNED(4) FileEntry
527 uint32_t nextEntryFileOffset; // 0 for the last entry in chain
534 const std::string fileName_;
535 const char* const sourceSignature_;
536 const size_t sourceSignatureSize_;
540 uint32_t entryOffsets[MAX_ENTRIES];
542 uint32_t getHash(const std::string& options)
544 uint64 hash = crc64((const uchar*)options.c_str(), options.size(), 0);
545 return hash & (MAX_ENTRIES - 1);
548 inline size_t getFileSize()
550 size_t pos = (size_t)f.tellg();
551 f.seekg(0, std::fstream::end);
552 size_t fileSize = (size_t)f.tellg();
553 f.seekg(pos, std::fstream::beg);
556 inline uint32_t readUInt32()
559 f.read((char*)&res, sizeof(uint32_t));
560 CV_Assert(!f.fail());
563 inline void writeUInt32(const uint32_t value)
566 f.write((char*)&v, sizeof(uint32_t));
567 CV_Assert(!f.fail());
570 inline void seekReadAbsolute(size_t pos)
572 f.seekg(pos, std::fstream::beg);
573 CV_Assert(!f.fail());
575 inline void seekReadRelative(size_t pos)
577 f.seekg(pos, std::fstream::cur);
578 CV_Assert(!f.fail());
581 inline void seekWriteAbsolute(size_t pos)
583 f.seekp(pos, std::fstream::beg);
584 CV_Assert(!f.fail());
590 if (0 != remove(fileName_.c_str()))
591 CV_LOG_ERROR(NULL, "Can't remove: " << fileName_);
596 BinaryProgramFile(const std::string& fileName, const char* sourceSignature)
597 : fileName_(fileName), sourceSignature_(sourceSignature), sourceSignatureSize_(sourceSignature_ ? strlen(sourceSignature_) : 0)
599 CV_StaticAssert(sizeof(uint32_t) == 4, "");
600 CV_Assert(sourceSignature_ != NULL);
601 CV_Assert(sourceSignatureSize_ > 0);
602 memset(entryOffsets, 0, sizeof(entryOffsets));
604 f.rdbuf()->pubsetbuf(0, 0); // disable buffering
605 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
606 if(f.is_open() && getFileSize() > 0)
608 bool isValid = false;
611 uint32_t fileSourceSignatureSize = readUInt32();
612 if (fileSourceSignatureSize == sourceSignatureSize_)
614 cv::AutoBuffer<char> fileSourceSignature(fileSourceSignatureSize + 1);
615 f.read(fileSourceSignature.data(), fileSourceSignatureSize);
618 CV_LOG_ERROR(NULL, "Unexpected EOF");
620 else if (memcmp(sourceSignature, fileSourceSignature.data(), fileSourceSignatureSize) == 0)
627 CV_LOG_ERROR(NULL, "Source code signature/hash mismatch (program source code has been changed/updated)");
630 catch (const cv::Exception& e)
632 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : " << e.what());
636 CV_LOG_ERROR(NULL, "Can't open binary program file: " << fileName << " : Unknown error");
649 bool read(const std::string& key, std::vector<char>& buf)
654 size_t fileSize = getFileSize();
657 CV_LOG_ERROR(NULL, "Invalid file (empty): " << fileName_);
664 uint32_t fileSourceSignatureSize = readUInt32();
665 CV_Assert(fileSourceSignatureSize > 0);
666 seekReadRelative(fileSourceSignatureSize);
668 uint32_t numberOfEntries = readUInt32();
669 CV_Assert(numberOfEntries > 0);
670 if (numberOfEntries != MAX_ENTRIES)
672 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
676 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
677 CV_Assert(!f.fail());
679 uint32_t entryNum = getHash(key);
681 uint32_t entryOffset = entryOffsets[entryNum];
683 while (entryOffset > 0)
685 seekReadAbsolute(entryOffset);
686 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
687 f.read((char*)&entry, sizeof(entry));
688 CV_Assert(!f.fail());
689 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
690 if (key.size() == entry.keySize)
692 if (entry.keySize > 0)
694 f.read(fileKey.data(), entry.keySize);
695 CV_Assert(!f.fail());
697 if (memcmp(fileKey.data(), key.c_str(), entry.keySize) == 0)
699 buf.resize(entry.dataSize);
700 f.read(&buf[0], entry.dataSize);
701 CV_Assert(!f.fail());
703 CV_LOG_VERBOSE(NULL, 0, "Read...");
707 if (entry.nextEntryFileOffset == 0)
709 entryOffset = entry.nextEntryFileOffset;
714 bool write(const std::string& key, std::vector<char>& buf)
718 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
721 f.open(fileName_.c_str(), std::ios::out|std::ios::binary);
724 CV_LOG_ERROR(NULL, "Can't create file: " << fileName_);
730 size_t fileSize = getFileSize();
734 seekWriteAbsolute(0);
735 writeUInt32((uint32_t)sourceSignatureSize_);
736 f.write(sourceSignature_, sourceSignatureSize_);
737 CV_Assert(!f.fail());
739 writeUInt32(MAX_ENTRIES);
740 memset(entryOffsets, 0, sizeof(entryOffsets));
741 f.write((char*)entryOffsets, sizeof(entryOffsets));
742 CV_Assert(!f.fail());
744 CV_Assert(!f.fail());
746 f.open(fileName_.c_str(), std::ios::in|std::ios::out|std::ios::binary);
747 CV_Assert(f.is_open());
748 fileSize = getFileSize();
753 uint32_t fileSourceSignatureSize = readUInt32();
754 CV_Assert(fileSourceSignatureSize == sourceSignatureSize_);
755 seekReadRelative(fileSourceSignatureSize);
757 uint32_t numberOfEntries = readUInt32();
758 CV_Assert(numberOfEntries > 0);
759 if (numberOfEntries != MAX_ENTRIES)
761 CV_LOG_ERROR(NULL, "Invalid file: " << fileName_);
765 size_t tableEntriesOffset = (size_t)f.tellg();
766 f.read((char*)&entryOffsets[0], sizeof(entryOffsets));
767 CV_Assert(!f.fail());
769 uint32_t entryNum = getHash(key);
771 uint32_t entryOffset = entryOffsets[entryNum];
773 while (entryOffset > 0)
775 seekReadAbsolute(entryOffset);
776 //CV_StaticAssert(sizeof(entry) == sizeof(uint32_t) * 3, "");
777 f.read((char*)&entry, sizeof(entry));
778 CV_Assert(!f.fail());
779 cv::AutoBuffer<char> fileKey(entry.keySize + 1);
780 if (key.size() == entry.keySize)
782 if (entry.keySize > 0)
784 f.read(fileKey.data(), entry.keySize);
785 CV_Assert(!f.fail());
787 if (0 == memcmp(fileKey.data(), key.c_str(), entry.keySize))
790 CV_LOG_VERBOSE(NULL, 0, "Duplicate key ignored: " << fileName_);
794 if (entry.nextEntryFileOffset == 0)
796 entryOffset = entry.nextEntryFileOffset;
801 seekWriteAbsolute(entryOffset);
802 entry.nextEntryFileOffset = (uint32_t)fileSize;
803 f.write((char*)&entry, sizeof(entry));
804 CV_Assert(!f.fail());
808 entryOffsets[entryNum] = (uint32_t)fileSize;
809 seekWriteAbsolute(tableEntriesOffset);
810 f.write((char*)entryOffsets, sizeof(entryOffsets));
811 CV_Assert(!f.fail());
813 seekWriteAbsolute(fileSize);
814 entry.nextEntryFileOffset = 0;
815 entry.dataSize = (uint32_t)buf.size();
816 entry.keySize = (uint32_t)key.size();
817 f.write((char*)&entry, sizeof(entry));
818 CV_Assert(!f.fail());
819 f.write(key.c_str(), entry.keySize);
820 CV_Assert(!f.fail());
821 f.write(&buf[0], entry.dataSize);
822 CV_Assert(!f.fail());
824 CV_Assert(!f.fail());
825 CV_LOG_VERBOSE(NULL, 0, "Write... (" << buf.size() << " bytes)");
829 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
832 // true if we have initialized OpenCL subsystem with available platforms
833 static bool g_isOpenCVActivated = false;
838 static bool g_isOpenCLInitialized = false;
839 static bool g_isOpenCLAvailable = false;
841 if (!g_isOpenCLInitialized)
843 CV_TRACE_REGION("Init_OpenCL_Runtime");
844 const char* envPath = getenv("OPENCV_OPENCL_RUNTIME");
847 if (cv::String(envPath) == "disabled")
849 g_isOpenCLAvailable = false;
850 g_isOpenCLInitialized = true;
853 CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
857 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
858 g_isOpenCVActivated = n > 0;
862 g_isOpenCLAvailable = false;
864 g_isOpenCLInitialized = true;
866 return g_isOpenCLAvailable;
871 CoreTLSData& data = getCoreTlsData();
872 if (data.useOpenCL < 0)
876 data.useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
883 return data.useOpenCL > 0;
886 bool isOpenCLActivated()
888 if (!g_isOpenCVActivated)
889 return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls
893 void setUseOpenCL(bool flag)
897 CoreTLSData& data = getCoreTlsData();
902 else if( haveOpenCL() )
904 data.useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0;
908 #ifdef HAVE_CLAMDBLAS
913 static AmdBlasHelper & getInstance()
915 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
918 bool isAvailable() const
920 return g_isAmdBlasAvailable;
935 if (!g_isAmdBlasInitialized)
937 AutoLock lock(getInitializationMutex());
939 if (!g_isAmdBlasInitialized)
945 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
949 g_isAmdBlasAvailable = false;
953 g_isAmdBlasAvailable = false;
955 g_isAmdBlasInitialized = true;
961 static bool g_isAmdBlasInitialized;
962 static bool g_isAmdBlasAvailable;
965 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
966 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
970 return AmdBlasHelper::getInstance().isAvailable();
987 static AmdFftHelper & getInstance()
989 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
992 bool isAvailable() const
994 return g_isAmdFftAvailable;
1001 // clAmdFftTeardown();
1009 if (!g_isAmdFftInitialized)
1011 AutoLock lock(getInitializationMutex());
1013 if (!g_isAmdFftInitialized)
1019 cl_uint major, minor, patch;
1020 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
1022 // it throws exception in case AmdFft binaries are not found
1023 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
1024 g_isAmdFftAvailable = true;
1026 catch (const Exception &)
1028 g_isAmdFftAvailable = false;
1032 g_isAmdFftAvailable = false;
1034 g_isAmdFftInitialized = true;
1040 static clAmdFftSetupData setupData;
1041 static bool g_isAmdFftInitialized;
1042 static bool g_isAmdFftAvailable;
1045 clAmdFftSetupData AmdFftHelper::setupData;
1046 bool AmdFftHelper::g_isAmdFftAvailable = false;
1047 bool AmdFftHelper::g_isAmdFftInitialized = false;
1051 return AmdFftHelper::getInstance().isAvailable();
1065 #ifdef HAVE_OPENCL_SVM
1074 Queue::getDefault().finish();
1077 /////////////////////////////////////////// Platform /////////////////////////////////////////////
1079 struct Platform::Impl
1085 initialized = false;
1094 //cl_uint num_entries
1096 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
1102 CV_OCL_DBG_CHECK(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len));
1104 vendor = String(buf);
1111 IMPLEMENT_REFCOUNTABLE();
1113 cl_platform_id handle;
1118 Platform::Platform()
1123 Platform::~Platform()
1129 Platform::Platform(const Platform& pl)
1136 Platform& Platform::operator = (const Platform& pl)
1138 Impl* newp = (Impl*)pl.p;
1147 void* Platform::ptr() const
1149 return p ? p->handle : 0;
1152 Platform& Platform::getDefault()
1163 /////////////////////////////////////// Device ////////////////////////////////////////////
1165 // deviceVersion has format
1166 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
1168 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
1169 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
1170 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
1173 if (10 >= deviceVersion.length())
1175 const char *pstr = deviceVersion.c_str();
1176 if (0 != strncmp(pstr, "OpenCL ", 7))
1178 size_t ppos = deviceVersion.find('.', 7);
1179 if (String::npos == ppos)
1181 String temp = deviceVersion.substr(7, ppos - 7);
1182 major = atoi(temp.c_str());
1183 temp = deviceVersion.substr(ppos + 1);
1184 minor = atoi(temp.c_str());
1191 handle = (cl_device_id)d;
1194 name_ = getStrProp(CL_DEVICE_NAME);
1195 version_ = getStrProp(CL_DEVICE_VERSION);
1196 extensions_ = getStrProp(CL_DEVICE_EXTENSIONS);
1197 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
1198 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
1199 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
1200 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
1201 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
1202 driverVersion_ = getStrProp(CL_DRIVER_VERSION);
1203 addressBits_ = getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS);
1205 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
1206 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
1209 while (pos < extensions_.size())
1211 size_t pos2 = extensions_.find(' ', pos);
1212 if (pos2 == String::npos)
1213 pos2 = extensions_.size();
1216 std::string extensionName = extensions_.substr(pos, pos2 - pos);
1217 extensions_set_.insert(extensionName);
1222 intelSubgroupsSupport_ = isExtensionSupported("cl_intel_subgroups");
1224 vendorName_ = getStrProp(CL_DEVICE_VENDOR);
1225 if (vendorName_ == "Advanced Micro Devices, Inc." ||
1226 vendorName_ == "AMD")
1227 vendorID_ = VENDOR_AMD;
1228 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
1229 vendorID_ = VENDOR_INTEL;
1230 else if (vendorName_ == "NVIDIA Corporation")
1231 vendorID_ = VENDOR_NVIDIA;
1233 vendorID_ = UNKNOWN_VENDOR;
1235 const size_t CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE", 0);
1236 if (CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE > 0)
1238 const size_t new_maxWorkGroupSize = std::min(maxWorkGroupSize_, CV_OPENCL_DEVICE_MAX_WORK_GROUP_SIZE);
1239 if (new_maxWorkGroupSize != maxWorkGroupSize_)
1240 CV_LOG_WARNING(NULL, "OpenCL: using workgroup size: " << new_maxWorkGroupSize << " (was " << maxWorkGroupSize_ << ")");
1241 maxWorkGroupSize_ = new_maxWorkGroupSize;
1244 if (isExtensionSupported("cl_khr_spir"))
1246 #ifndef CL_DEVICE_SPIR_VERSIONS
1247 #define CL_DEVICE_SPIR_VERSIONS 0x40E0
1249 cv::String spir_versions = getStrProp(CL_DEVICE_SPIR_VERSIONS);
1250 std::cout << spir_versions << std::endl;
1255 template<typename _TpCL, typename _TpOut>
1256 _TpOut getProp(cl_device_info prop) const
1261 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1262 sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
1265 bool getBoolProp(cl_device_info prop) const
1267 cl_bool temp = CL_FALSE;
1270 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
1271 sz == sizeof(temp) ? temp != 0 : false;
1274 String getStrProp(cl_device_info prop) const
1278 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
1279 sz < sizeof(buf) ? String(buf) : String();
1282 bool isExtensionSupported(const std::string& extensionName) const
1284 return extensions_set_.count(extensionName) > 0;
1288 IMPLEMENT_REFCOUNTABLE();
1290 cl_device_id handle;
1294 std::string extensions_;
1295 int doubleFPConfig_;
1296 bool hostUnifiedMemory_;
1297 int maxComputeUnits_;
1298 size_t maxWorkGroupSize_;
1301 int deviceVersionMajor_;
1302 int deviceVersionMinor_;
1303 String driverVersion_;
1306 bool intelSubgroupsSupport_;
1308 std::set<std::string> extensions_set_;
1317 Device::Device(void* d)
1323 Device::Device(const Device& d)
1330 Device& Device::operator = (const Device& d)
1332 Impl* newp = (Impl*)d.p;
1347 void Device::set(void* d)
1354 void* Device::ptr() const
1356 return p ? p->handle : 0;
1359 String Device::name() const
1360 { return p ? p->name_ : String(); }
1362 String Device::extensions() const
1363 { return p ? String(p->extensions_) : String(); }
1365 bool Device::isExtensionSupported(const String& extensionName) const
1366 { return p ? p->isExtensionSupported(extensionName) : false; }
1368 String Device::version() const
1369 { return p ? p->version_ : String(); }
1371 String Device::vendorName() const
1372 { return p ? p->vendorName_ : String(); }
1374 int Device::vendorID() const
1375 { return p ? p->vendorID_ : 0; }
1377 String Device::OpenCL_C_Version() const
1378 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
1380 String Device::OpenCLVersion() const
1381 { return p ? p->getStrProp(CL_DEVICE_VERSION) : String(); }
1383 int Device::deviceVersionMajor() const
1384 { return p ? p->deviceVersionMajor_ : 0; }
1386 int Device::deviceVersionMinor() const
1387 { return p ? p->deviceVersionMinor_ : 0; }
1389 String Device::driverVersion() const
1390 { return p ? p->driverVersion_ : String(); }
1392 int Device::type() const
1393 { return p ? p->type_ : 0; }
1395 int Device::addressBits() const
1396 { return p ? p->addressBits_ : 0; }
1398 bool Device::available() const
1399 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
1401 bool Device::compilerAvailable() const
1402 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
1404 bool Device::linkerAvailable() const
1405 #ifdef CL_VERSION_1_2
1406 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
1408 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1411 int Device::doubleFPConfig() const
1412 { return p ? p->doubleFPConfig_ : 0; }
1414 int Device::singleFPConfig() const
1415 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
1417 int Device::halfFPConfig() const
1418 #ifdef CL_VERSION_1_2
1419 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
1421 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1424 bool Device::endianLittle() const
1425 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
1427 bool Device::errorCorrectionSupport() const
1428 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
1430 int Device::executionCapabilities() const
1431 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
1433 size_t Device::globalMemCacheSize() const
1434 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
1436 int Device::globalMemCacheType() const
1437 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
1439 int Device::globalMemCacheLineSize() const
1440 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
1442 size_t Device::globalMemSize() const
1443 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
1445 size_t Device::localMemSize() const
1446 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
1448 int Device::localMemType() const
1449 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
1451 bool Device::hostUnifiedMemory() const
1452 { return p ? p->hostUnifiedMemory_ : false; }
1454 bool Device::imageSupport() const
1455 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
1457 bool Device::imageFromBufferSupport() const
1459 return p ? p->isExtensionSupported("cl_khr_image2d_from_buffer") : false;
1462 uint Device::imagePitchAlignment() const
1464 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
1465 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
1471 uint Device::imageBaseAddressAlignment() const
1473 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
1474 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
1480 size_t Device::image2DMaxWidth() const
1481 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
1483 size_t Device::image2DMaxHeight() const
1484 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
1486 size_t Device::image3DMaxWidth() const
1487 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
1489 size_t Device::image3DMaxHeight() const
1490 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
1492 size_t Device::image3DMaxDepth() const
1493 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
1495 size_t Device::imageMaxBufferSize() const
1496 #ifdef CL_VERSION_1_2
1497 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
1499 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1502 size_t Device::imageMaxArraySize() const
1503 #ifdef CL_VERSION_1_2
1504 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
1506 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1509 bool Device::intelSubgroupsSupport() const
1510 { return p ? p->intelSubgroupsSupport_ : false; }
1512 int Device::maxClockFrequency() const
1513 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
1515 int Device::maxComputeUnits() const
1516 { return p ? p->maxComputeUnits_ : 0; }
1518 int Device::maxConstantArgs() const
1519 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
1521 size_t Device::maxConstantBufferSize() const
1522 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
1524 size_t Device::maxMemAllocSize() const
1525 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
1527 size_t Device::maxParameterSize() const
1528 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
1530 int Device::maxReadImageArgs() const
1531 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
1533 int Device::maxWriteImageArgs() const
1534 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
1536 int Device::maxSamplers() const
1537 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
1539 size_t Device::maxWorkGroupSize() const
1540 { return p ? p->maxWorkGroupSize_ : 0; }
1542 int Device::maxWorkItemDims() const
1543 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
1545 void Device::maxWorkItemSizes(size_t* sizes) const
1549 const int MAX_DIMS = 32;
1551 CV_OCL_DBG_CHECK(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1552 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz));
1556 int Device::memBaseAddrAlign() const
1557 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
1559 int Device::nativeVectorWidthChar() const
1560 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
1562 int Device::nativeVectorWidthShort() const
1563 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
1565 int Device::nativeVectorWidthInt() const
1566 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
1568 int Device::nativeVectorWidthLong() const
1569 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
1571 int Device::nativeVectorWidthFloat() const
1572 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
1574 int Device::nativeVectorWidthDouble() const
1575 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
1577 int Device::nativeVectorWidthHalf() const
1578 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
1580 int Device::preferredVectorWidthChar() const
1581 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
1583 int Device::preferredVectorWidthShort() const
1584 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
1586 int Device::preferredVectorWidthInt() const
1587 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
1589 int Device::preferredVectorWidthLong() const
1590 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
1592 int Device::preferredVectorWidthFloat() const
1593 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
1595 int Device::preferredVectorWidthDouble() const
1596 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
1598 int Device::preferredVectorWidthHalf() const
1599 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
1601 size_t Device::printfBufferSize() const
1602 #ifdef CL_VERSION_1_2
1603 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
1605 { CV_REQUIRE_OPENCL_1_2_ERROR; }
1609 size_t Device::profilingTimerResolution() const
1610 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
1612 const Device& Device::getDefault()
1614 const Context& ctx = Context::getDefault();
1615 int idx = getCoreTlsData().device;
1616 const Device& device = ctx.device(idx);
1620 ////////////////////////////////////// Context ///////////////////////////////////////////////////
1622 template <typename Functor, typename ObjectType>
1623 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
1626 cl_int err = f(obj, name, 0, NULL, &required);
1627 if (err != CL_SUCCESS)
1633 AutoBuffer<char> buf(required + 1);
1634 char* ptr = buf.data(); // cleanup is not needed
1635 err = f(obj, name, required, ptr, NULL);
1636 if (err != CL_SUCCESS)
1644 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
1649 std::istringstream ss(s);
1653 std::getline(ss, item, delim);
1654 elems.push_back(item);
1658 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
1660 // Sample: AMD:GPU:Tahiti
1661 // Sample: :GPU|CPU: = '' = ':' = '::'
1662 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
1663 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
1665 std::vector<std::string> parts;
1666 split(configurationStr, ':', parts);
1667 if (parts.size() > 3)
1669 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
1672 if (parts.size() > 2)
1673 deviceNameOrID = parts[2];
1674 if (parts.size() > 1)
1676 split(parts[1], '|', deviceTypes);
1678 if (parts.size() > 0)
1680 platform = parts[0];
1685 #if defined WINRT || defined _WIN32_WCE
1686 static cl_device_id selectOpenCLDevice()
1691 // std::tolower is int->int
1692 static char char_tolower(char ch)
1694 return (char)std::tolower((int)ch);
1696 static cl_device_id selectOpenCLDevice()
1698 std::string platform, deviceName;
1699 std::vector<std::string> deviceTypes;
1701 const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
1702 if (configuration &&
1703 (strcmp(configuration, "disabled") == 0 ||
1704 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
1710 if (deviceName.length() == 1)
1711 // We limit ID range to 0..9, because we want to write:
1712 // - '2500' to mean i5-2500
1713 // - '8350' to mean AMD FX-8350
1714 // - '650' to mean GeForce 650
1715 // To extend ID range change condition to '> 0'
1718 for (size_t i = 0; i < deviceName.length(); i++)
1720 if (!isdigit(deviceName[i]))
1728 deviceID = atoi(deviceName.c_str());
1734 std::vector<cl_platform_id> platforms;
1736 cl_uint numPlatforms = 0;
1737 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
1739 if (numPlatforms == 0)
1741 platforms.resize((size_t)numPlatforms);
1742 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
1743 platforms.resize(numPlatforms);
1746 int selectedPlatform = -1;
1747 if (platform.length() > 0)
1749 for (size_t i = 0; i < platforms.size(); i++)
1752 CV_OCL_DBG_CHECK(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name));
1753 if (name.find(platform) != std::string::npos)
1755 selectedPlatform = (int)i;
1759 if (selectedPlatform == -1)
1761 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
1765 if (deviceTypes.size() == 0)
1769 deviceTypes.push_back("GPU");
1771 deviceTypes.push_back("CPU");
1774 deviceTypes.push_back("ALL");
1776 for (size_t t = 0; t < deviceTypes.size(); t++)
1779 std::string tempStrDeviceType = deviceTypes[t];
1780 std::transform(tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), char_tolower);
1782 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
1783 deviceType = Device::TYPE_GPU;
1784 else if (tempStrDeviceType == "cpu")
1785 deviceType = Device::TYPE_CPU;
1786 else if (tempStrDeviceType == "accelerator")
1787 deviceType = Device::TYPE_ACCELERATOR;
1788 else if (tempStrDeviceType == "all")
1789 deviceType = Device::TYPE_ALL;
1792 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
1796 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
1797 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
1798 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
1802 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
1803 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
1805 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get count");
1809 size_t base = devices.size();
1810 devices.resize(base + count);
1811 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
1812 if (!(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND))
1814 CV_OCL_DBG_CHECK_RESULT(status, "clGetDeviceIDs get IDs");
1818 for (size_t i = (isID ? deviceID : 0);
1819 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
1823 CV_OCL_DBG_CHECK(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name));
1824 cl_bool useGPU = true;
1825 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
1827 cl_bool isIGPU = CL_FALSE;
1828 CV_OCL_DBG_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL));
1829 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
1831 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
1833 // TODO check for OpenCL 1.1
1841 return NULL; // suppress messages on stderr
1843 std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << configuration << std::endl
1844 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
1845 << " Device types: ";
1846 for (size_t t = 0; t < deviceTypes.size(); t++)
1847 std::cerr << deviceTypes[t] << " ";
1849 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
1854 #ifdef HAVE_OPENCL_SVM
1857 enum AllocatorFlags { // don't use first 16 bits
1858 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
1859 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
1860 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
1861 OPENCL_SVM_BUFFER_MASK = 3 << 16,
1862 OPENCL_SVM_BUFFER_MAP = 4 << 16
1865 static bool checkForceSVMUmatUsage()
1867 static bool initialized = false;
1868 static bool force = false;
1871 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
1876 static bool checkDisableSVMUMatUsage()
1878 static bool initialized = false;
1879 static bool force = false;
1882 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
1887 static bool checkDisableSVM()
1889 static bool initialized = false;
1890 static bool force = false;
1893 force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
1898 // see SVMCapabilities
1899 static unsigned int getSVMCapabilitiesMask()
1901 static bool initialized = false;
1902 static unsigned int mask = 0;
1905 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
1906 if (envValue == NULL)
1908 return ~0U; // all bits 1
1910 mask = atoi(envValue);
1918 static size_t getProgramCountLimit()
1920 static bool initialized = false;
1921 static size_t count = 0;
1924 count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
1930 struct Context::Impl
1932 static Context::Impl* get(Context& context) { return context.p; }
1938 #ifdef HAVE_OPENCL_SVM
1939 svmInitialized = false;
1950 CV_Assert(handle == NULL);
1952 cl_device_id d = selectOpenCLDevice();
1957 cl_platform_id pl = NULL;
1958 CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
1960 cl_context_properties prop[] =
1962 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
1966 // !!! in the current implementation force the number of devices to 1 !!!
1970 handle = clCreateContext(prop, nd, &d, 0, 0, &status);
1971 CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
1973 bool ok = handle != 0 && status == CL_SUCCESS;
1988 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
1989 cl_context_properties prop[] =
1991 CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
1996 int dtype = dtype0 & 15;
1997 cl_int status = clGetDeviceIDs(pl, dtype, 0, NULL, &nd0);
1998 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
2000 CV_OCL_DBG_CHECK_RESULT(status,
2001 cv::format("clGetDeviceIDs(platform=%p, device_type=%d, num_entries=0, devices=NULL, numDevices=%p)", pl, dtype, &nd0).c_str());
2007 AutoBuffer<void*> dlistbuf(nd0*2+1);
2008 cl_device_id* dlist = (cl_device_id*)dlistbuf.data();
2009 cl_device_id* dlist_new = dlist + nd0;
2010 CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0));
2014 for(i = 0; i < nd0; i++)
2017 if( !d.available() || !d.compilerAvailable() )
2019 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
2021 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
2023 String name = d.name();
2024 if( nd != 0 && name != name0 )
2027 dlist_new[nd++] = dlist[i];
2033 // !!! in the current implementation force the number of devices to 1 !!!
2036 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
2037 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateContext");
2038 bool ok = handle != 0 && retval == CL_SUCCESS;
2042 for( i = 0; i < nd; i++ )
2043 devices[i].set(dlist_new[i]);
2051 CV_OCL_DBG_CHECK(clReleaseContext(handle));
2057 Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
2059 void unloadProg(Program& prog)
2061 cv::AutoLock lock(program_cache_mutex);
2062 for (CacheList::iterator i = cacheList.begin(); i != cacheList.end(); ++i)
2064 phash_t::iterator it = phash.find(*i);
2065 if (it != phash.end())
2067 if (it->second.ptr() == prog.ptr())
2077 std::string& getPrefixString()
2081 cv::AutoLock lock(program_cache_mutex);
2084 CV_Assert(!devices.empty());
2085 const Device& d = devices[0];
2086 int bits = d.addressBits();
2087 if (bits > 0 && bits != 64)
2088 prefix = cv::format("%d-bit--", bits);
2089 prefix += d.vendorName() + "--" + d.name() + "--" + d.driverVersion();
2091 for (size_t i = 0; i < prefix.size(); i++)
2094 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2104 std::string& getPrefixBase()
2106 if (prefix_base.empty())
2108 cv::AutoLock lock(program_cache_mutex);
2109 if (prefix_base.empty())
2111 const Device& d = devices[0];
2112 int bits = d.addressBits();
2113 if (bits > 0 && bits != 64)
2114 prefix_base = cv::format("%d-bit--", bits);
2115 prefix_base += d.vendorName() + "--" + d.name() + "--";
2117 for (size_t i = 0; i < prefix_base.size(); i++)
2119 char c = prefix_base[i];
2120 if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '-'))
2122 prefix_base[i] = '_';
2130 IMPLEMENT_REFCOUNTABLE();
2133 std::vector<Device> devices;
2136 std::string prefix_base;
2138 cv::Mutex program_cache_mutex;
2139 typedef std::map<std::string, Program> phash_t;
2141 typedef std::list<cv::String> CacheList;
2142 CacheList cacheList;
2144 #ifdef HAVE_OPENCL_SVM
2145 bool svmInitialized;
2148 svm::SVMCapabilities svmCapabilities;
2149 svm::SVMFunctions svmFunctions;
2153 CV_Assert(handle != NULL);
2154 const Device& device = devices[0];
2155 cl_device_svm_capabilities deviceCaps = 0;
2156 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
2157 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
2158 if (status != CL_SUCCESS)
2160 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
2163 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
2164 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
2165 svmCapabilities.value_ =
2166 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
2167 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
2168 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
2169 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
2170 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
2171 if (svmCapabilities.value_ == 0)
2173 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
2179 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
2180 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
2183 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
2184 CV_Error(Error::StsBadArg, "clSVMAlloc returned NULL");
2189 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
2190 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
2192 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
2193 CV_Error(Error::StsBadArg, "clEnqueueSVMMap FAILED");
2198 ((int*)ptr)[0] = 100;
2202 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
2205 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
2207 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
2208 CV_Error(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
2213 CV_Error(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
2218 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
2219 clSVMFree(handle, ptr);
2222 clSVMFree(handle, ptr);
2223 svmFunctions.fn_clSVMAlloc = clSVMAlloc;
2224 svmFunctions.fn_clSVMFree = clSVMFree;
2225 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
2226 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
2227 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
2228 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
2229 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
2230 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
2231 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
2235 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
2238 // Try HSA extension
2239 String extensions = device.extensions();
2240 if (extensions.find("cl_amd_svm") == String::npos)
2242 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
2245 cl_platform_id p = NULL;
2246 CV_OCL_CHECK(status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL));
2247 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
2248 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
2249 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
2250 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
2251 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
2252 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
2253 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
2254 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
2255 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
2256 CV_Assert(svmFunctions.isValid());
2260 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
2265 svmAvailable = true;
2266 svmEnabled = !svm::checkDisableSVM();
2267 svmInitialized = true;
2268 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
2271 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
2272 svmAvailable = false;
2274 svmCapabilities.value_ = 0;
2275 svmInitialized = true;
2276 svmFunctions.fn_clSVMAlloc = NULL;
2281 friend class Program;
2290 Context::Context(int dtype)
2296 bool Context::create()
2311 bool Context::create(int dtype0)
2317 p = new Impl(dtype0);
2335 Context::Context(const Context& c)
2342 Context& Context::operator = (const Context& c)
2344 Impl* newp = (Impl*)c.p;
2353 void* Context::ptr() const
2355 return p == NULL ? NULL : p->handle;
2358 size_t Context::ndevices() const
2360 return p ? p->devices.size() : 0;
2363 const Device& Context::device(size_t idx) const
2365 static Device dummy;
2366 return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
2369 Context& Context::getDefault(bool initialize)
2371 static Context* ctx = new Context();
2372 if(!ctx->p && haveOpenCL())
2375 ctx->p = new Impl();
2378 // do not create new Context right away.
2379 // First, try to retrieve existing context of the same type.
2380 // In its turn, Platform::getContext() may call Context::create()
2381 // if there is no such context.
2382 if (ctx->p->handle == NULL)
2383 ctx->p->setDefault();
2390 Program Context::getProg(const ProgramSource& prog,
2391 const String& buildopts, String& errmsg)
2393 return p ? p->getProg(prog, buildopts, errmsg) : Program();
2396 void Context::unloadProg(Program& prog)
2399 p->unloadProg(prog);
2402 #ifdef HAVE_OPENCL_SVM
2403 bool Context::useSVM() const
2405 Context::Impl* i = p;
2407 if (!i->svmInitialized)
2409 return i->svmEnabled;
2411 void Context::setUseSVM(bool enabled)
2413 Context::Impl* i = p;
2415 if (!i->svmInitialized)
2417 if (enabled && !i->svmAvailable)
2419 CV_Error(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
2421 i->svmEnabled = enabled;
2424 bool Context::useSVM() const { return false; }
2425 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
2428 #ifdef HAVE_OPENCL_SVM
2431 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
2433 Context::Impl* i = context.p;
2435 if (!i->svmInitialized)
2437 return i->svmCapabilities;
2440 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
2442 Context::Impl* i = context.p;
2444 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
2445 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
2446 return &i->svmFunctions;
2449 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
2451 if (checkForceSVMUmatUsage())
2453 if (checkDisableSVMUMatUsage())
2455 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
2457 return false; // don't use SVM by default
2460 } // namespace cv::ocl::svm
2461 #endif // HAVE_OPENCL_SVM
2464 static void get_platform_name(cl_platform_id id, String& name)
2466 // get platform name string length
2468 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz));
2470 // get platform name string
2471 AutoBuffer<char> buf(sz + 1);
2472 CV_OCL_CHECK(clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf.data(), 0));
2474 // just in case, ensure trailing zero for ASCIIZ string
2481 // Attaches OpenCL context to OpenCV
2483 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
2487 CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
2490 CV_Error(cv::Error::OpenCLApiCallError, "no OpenCL platform available!");
2492 std::vector<cl_platform_id> platforms(cnt);
2494 CV_OCL_CHECK(clGetPlatformIDs(cnt, &platforms[0], 0));
2496 bool platformAvailable = false;
2498 // check if external platformName contained in list of available platforms in OpenCV
2499 for (unsigned int i = 0; i < cnt; i++)
2501 String availablePlatformName;
2502 get_platform_name(platforms[i], availablePlatformName);
2503 // external platform is found in the list of available platforms
2504 if (platformName == availablePlatformName)
2506 platformAvailable = true;
2511 if (!platformAvailable)
2512 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
2514 // check if platformID corresponds to platformName
2515 String actualPlatformName;
2516 get_platform_name((cl_platform_id)platformID, actualPlatformName);
2517 if (platformName != actualPlatformName)
2518 CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
2520 // do not initialize OpenCL context
2521 Context ctx = Context::getDefault(false);
2523 // attach supplied context to OpenCV
2524 initializeContextFromHandle(ctx, platformID, context, deviceID);
2526 CV_OCL_CHECK(clRetainContext((cl_context)context));
2528 // clear command queue, if any
2529 CoreTLSData& data = getCoreTlsData();
2530 data.oclQueue.finish();
2535 } // attachContext()
2538 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
2540 cl_context context = (cl_context)_context;
2541 cl_device_id device = (cl_device_id)_device;
2543 // cleanup old context
2544 Context::Impl * impl = ctx.p;
2547 CV_OCL_DBG_CHECK(clReleaseContext(impl->handle));
2549 impl->devices.clear();
2551 impl->handle = context;
2552 impl->devices.resize(1);
2553 impl->devices[0].set(device);
2555 Platform& p = Platform::getDefault();
2556 Platform::Impl* pImpl = p.p;
2557 pImpl->handle = (cl_platform_id)platform;
2560 /////////////////////////////////////////// Queue /////////////////////////////////////////////
2564 inline void __init()
2568 isProfilingQueue_ = false;
2571 Impl(cl_command_queue q)
2576 cl_command_queue_properties props = 0;
2577 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL));
2578 isProfilingQueue_ = !!(props & CL_QUEUE_PROFILING_ENABLE);
2581 Impl(cl_command_queue q, bool isProfilingQueue)
2585 isProfilingQueue_ = isProfilingQueue;
2588 Impl(const Context& c, const Device& d, bool withProfiling = false)
2592 const Context* pc = &c;
2593 cl_context ch = (cl_context)pc->ptr();
2596 pc = &Context::getDefault();
2597 ch = (cl_context)pc->ptr();
2599 cl_device_id dh = (cl_device_id)d.ptr();
2601 dh = (cl_device_id)pc->device(0).ptr();
2603 cl_command_queue_properties props = withProfiling ? CL_QUEUE_PROFILING_ENABLE : 0;
2604 CV_OCL_DBG_CHECK_(handle = clCreateCommandQueue(ch, dh, props, &retval), retval);
2605 isProfilingQueue_ = withProfiling;
2611 if (!cv::__termination)
2616 CV_OCL_DBG_CHECK(clFinish(handle));
2617 CV_OCL_DBG_CHECK(clReleaseCommandQueue(handle));
2623 const cv::ocl::Queue& getProfilingQueue(const cv::ocl::Queue& self)
2625 if (isProfilingQueue_)
2628 if (profiling_queue_.ptr())
2629 return profiling_queue_;
2632 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_CONTEXT, sizeof(cl_context), &ctx, NULL));
2634 cl_device_id device = 0;
2635 CV_OCL_CHECK(clGetCommandQueueInfo(handle, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device, NULL));
2637 cl_int result = CL_SUCCESS;
2638 cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
2639 cl_command_queue q = clCreateCommandQueue(ctx, device, props, &result);
2640 CV_OCL_DBG_CHECK_RESULT(result, "clCreateCommandQueue(with CL_QUEUE_PROFILING_ENABLE)");
2643 queue.p = new Impl(q, true);
2644 profiling_queue_ = queue;
2646 return profiling_queue_;
2649 IMPLEMENT_REFCOUNTABLE();
2651 cl_command_queue handle;
2652 bool isProfilingQueue_;
2653 cv::ocl::Queue profiling_queue_;
2661 Queue::Queue(const Context& c, const Device& d)
2667 Queue::Queue(const Queue& q)
2674 Queue& Queue::operator = (const Queue& q)
2676 Impl* newp = (Impl*)q.p;
2691 bool Queue::create(const Context& c, const Device& d)
2696 return p->handle != 0;
2699 void Queue::finish()
2703 CV_OCL_DBG_CHECK(clFinish(p->handle));
2707 const Queue& Queue::getProfilingQueue() const
2710 return p->getProfilingQueue(*this);
2713 void* Queue::ptr() const
2715 return p ? p->handle : 0;
2718 Queue& Queue::getDefault()
2720 Queue& q = getCoreTlsData().oclQueue;
2721 if( !q.p && haveOpenCL() )
2722 q.create(Context::getDefault());
2726 static cl_command_queue getQueue(const Queue& q)
2728 cl_command_queue qq = (cl_command_queue)q.ptr();
2730 qq = (cl_command_queue)Queue::getDefault().ptr();
2734 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
2736 KernelArg::KernelArg()
2737 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
2741 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
2742 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
2744 CV_Assert(_flags == LOCAL || _flags == CONSTANT || _m != NULL);
2747 KernelArg KernelArg::Constant(const Mat& m)
2749 CV_Assert(m.isContinuous());
2750 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
2753 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
2757 Impl(const char* kname, const Program& prog) :
2758 refcount(1), handle(NULL), isInProgress(false), nu(0)
2760 cl_program ph = (cl_program)prog.ptr();
2765 handle = clCreateKernel(ph, kname, &retval);
2766 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateKernel('%s')", kname).c_str());
2768 for( int i = 0; i < MAX_ARRS; i++ )
2770 haveTempDstUMats = false;
2771 haveTempSrcUMats = false;
2776 for( int i = 0; i < MAX_ARRS; i++ )
2779 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
2781 u[i]->flags |= UMatData::ASYNC_CLEANUP;
2782 u[i]->currAllocator->deallocate(u[i]);
2787 haveTempDstUMats = false;
2788 haveTempSrcUMats = false;
2791 void addUMat(const UMat& m, bool dst)
2793 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
2795 CV_XADD(&m.u->urefcount, 1);
2797 if(dst && m.u->tempUMat())
2798 haveTempDstUMats = true;
2799 if(m.u->originalUMatData == NULL && m.u->tempUMat())
2800 haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat)
2803 void addImage(const Image2D& image)
2805 images.push_back(image);
2808 void finit(cl_event e)
2813 isInProgress = false;
2817 bool run(int dims, size_t _globalsize[], size_t _localsize[],
2818 bool sync, int64* timeNS, const Queue& q);
2824 CV_OCL_DBG_CHECK(clReleaseKernel(handle));
2828 IMPLEMENT_REFCOUNTABLE();
2832 enum { MAX_ARRS = 16 };
2833 UMatData* u[MAX_ARRS];
2836 std::list<Image2D> images;
2837 bool haveTempDstUMats;
2838 bool haveTempSrcUMats;
2841 }} // namespace cv::ocl
2845 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2849 ((cv::ocl::Kernel::Impl*)p)->finit(e);
2851 catch (const cv::Exception& exc)
2853 CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
2855 catch (const std::exception& exc)
2857 CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
2861 CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
2867 namespace cv { namespace ocl {
2874 Kernel::Kernel(const char* kname, const Program& prog)
2877 create(kname, prog);
2880 Kernel::Kernel(const char* kname, const ProgramSource& src,
2881 const String& buildopts, String* errmsg)
2884 create(kname, src, buildopts, errmsg);
2887 Kernel::Kernel(const Kernel& k)
2894 Kernel& Kernel::operator = (const Kernel& k)
2896 Impl* newp = (Impl*)k.p;
2911 bool Kernel::create(const char* kname, const Program& prog)
2915 p = new Impl(kname, prog);
2921 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
2927 bool Kernel::create(const char* kname, const ProgramSource& src,
2928 const String& buildopts, String* errmsg)
2936 if( !errmsg ) errmsg = &tempmsg;
2937 const Program prog = Context::getDefault().getProg(src, buildopts, *errmsg);
2938 return create(kname, prog);
2941 void* Kernel::ptr() const
2943 return p ? p->handle : 0;
2946 bool Kernel::empty() const
2951 int Kernel::set(int i, const void* value, size_t sz)
2953 if (!p || !p->handle)
2960 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
2961 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%p)", p->name.c_str(), (int)i, (int)sz, (void*)value).c_str());
2962 if (retval != CL_SUCCESS)
2967 int Kernel::set(int i, const Image2D& image2D)
2969 p->addImage(image2D);
2970 cl_mem h = (cl_mem)image2D.ptr();
2971 return set(i, &h, sizeof(h));
2974 int Kernel::set(int i, const UMat& m)
2976 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m));
2979 int Kernel::set(int i, const KernelArg& arg)
2981 if( !p || !p->handle )
2985 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index",
2986 p->name.c_str(), (int)i));
2994 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
2995 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
2996 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
2997 if (ptronly && arg.m->empty())
2999 cl_mem h_null = (cl_mem)NULL;
3000 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h_null), &h_null);
3001 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)", p->name.c_str(), (int)i).c_str());
3004 cl_mem h = (cl_mem)arg.m->handle(accessFlags);
3008 CV_LOG_ERROR(NULL, cv::format("OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)",
3009 p->name.c_str(), (int)i, (int)arg.flags, arg.m));
3015 #ifdef HAVE_OPENCL_SVM
3016 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
3018 const Context& ctx = Context::getDefault();
3019 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
3020 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
3021 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
3023 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
3025 status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
3027 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArgSVMPointer('%s', arg_index=%d, ptr=%p)", p->name.c_str(), (int)i, (void*)svmDataPtr).c_str());
3032 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
3033 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cl_mem=%p)", p->name.c_str(), (int)i, (void*)h).c_str());
3040 else if( arg.m->dims <= 2 )
3043 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
3044 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+1), (int)u2d.step).c_str());
3045 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
3046 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+2), (int)u2d.offset).c_str());
3049 if( !(arg.flags & KernelArg::NO_SIZE) )
3051 int cols = u2d.cols*arg.wscale/arg.iwscale;
3052 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
3053 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)i, (int)u2d.rows).c_str());
3054 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
3055 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+1), (int)cols).c_str());
3062 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
3063 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slicestep_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.slicestep).c_str());
3064 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
3065 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, step_value=%d)", p->name.c_str(), (int)(i+2), (int)u3d.step).c_str());
3066 status = clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
3067 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, offset_value=%d)", p->name.c_str(), (int)(i+3), (int)u3d.offset).c_str());
3069 if( !(arg.flags & KernelArg::NO_SIZE) )
3071 int cols = u3d.cols*arg.wscale/arg.iwscale;
3072 status = clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.slices);
3073 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, slices_value=%d)", p->name.c_str(), (int)i, (int)u3d.slices).c_str());
3074 status = clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
3075 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, rows_value=%d)", p->name.c_str(), (int)(i+1), (int)u3d.rows).c_str());
3076 status = clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
3077 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, cols_value=%d)", p->name.c_str(), (int)(i+2), (int)cols).c_str());
3081 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
3084 status = clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
3085 CV_OCL_DBG_CHECK_RESULT(status, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, obj=%p)", p->name.c_str(), (int)i, (int)arg.sz, (void*)arg.obj).c_str());
3089 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3090 bool sync, const Queue& q)
3095 size_t globalsize[CV_MAX_DIM] = {1,1,1};
3097 CV_Assert(_globalsize != NULL);
3098 for (int i = 0; i < dims; i++)
3100 size_t val = _localsize ? _localsize[i] :
3101 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
3102 CV_Assert( val > 0 );
3103 total *= _globalsize[i];
3104 if (_globalsize[i] == 1 && !_localsize)
3106 globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
3108 CV_Assert(total > 0);
3110 return p->run(dims, globalsize, _localsize, sync, NULL, q);
3114 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
3115 bool sync, int64* timeNS, const Queue& q)
3117 CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
3119 if (!handle || isInProgress)
3122 cl_command_queue qq = getQueue(q);
3123 if (haveTempDstUMats)
3125 if (haveTempSrcUMats)
3129 cl_event asyncEvent = 0;
3130 cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
3131 NULL, globalsize, localsize, 0, 0,
3132 (sync && !timeNS) ? 0 : &asyncEvent);
3133 #if !CV_OPENCL_SHOW_RUN_KERNELS
3134 if (retval != CL_SUCCESS)
3137 cv::String msg = cv::format("clEnqueueNDRangeKernel('%s', dims=%d, globalsize=%dx%dx%d, localsize=%s) sync=%s", name.c_str(), (int)dims,
3138 globalsize[0], (dims > 1 ? globalsize[1] : 1), (dims > 2 ? globalsize[2] : 1),
3139 (localsize ? cv::format("%dx%dx%d", localsize[0], (dims > 1 ? localsize[1] : 1), (dims > 2 ? localsize[2] : 1)) : cv::String("NULL")).c_str(),
3140 sync ? "true" : "false"
3142 if (retval != CL_SUCCESS)
3144 msg = CV_OCL_API_ERROR_MSG(retval, msg.c_str());
3146 #if CV_OPENCL_TRACE_CHECK
3147 CV_OCL_TRACE_CHECK_RESULT(retval, msg.c_str());
3149 printf("%s\n", msg.c_str());
3153 if (sync || retval != CL_SUCCESS)
3155 CV_OCL_DBG_CHECK(clFinish(qq));
3158 if (retval == CL_SUCCESS)
3160 CV_OCL_DBG_CHECK(clWaitForEvents(1, &asyncEvent));
3161 cl_ulong startTime, stopTime;
3162 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
3163 CV_OCL_CHECK(clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
3164 *timeNS = (int64)(stopTime - startTime);
3176 isInProgress = true;
3177 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this));
3180 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3181 return retval == CL_SUCCESS;
3184 bool Kernel::runTask(bool sync, const Queue& q)
3186 if(!p || !p->handle || p->isInProgress)
3189 cl_command_queue qq = getQueue(q);
3190 cl_event asyncEvent = 0;
3191 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &asyncEvent);
3192 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueTask('%s') sync=%s", p->name.c_str(), sync ? "true" : "false").c_str());
3193 if (sync || retval != CL_SUCCESS)
3195 CV_OCL_DBG_CHECK(clFinish(qq));
3201 p->isInProgress = true;
3202 CV_OCL_CHECK(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p));
3205 CV_OCL_DBG_CHECK(clReleaseEvent(asyncEvent));
3206 return retval == CL_SUCCESS;
3209 int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
3211 CV_Assert(p && p->handle && !p->isInProgress);
3212 Queue q = q_.ptr() ? q_ : Queue::getDefault();
3214 q.finish(); // call clFinish() on base queue
3215 Queue profilingQueue = q.getProfilingQueue();
3217 bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
3218 return res ? timeNs : -1;
3221 size_t Kernel::workGroupSize() const
3223 if(!p || !p->handle)
3225 size_t val = 0, retsz = 0;
3226 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3227 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(val), &val, &retsz);
3228 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE)");
3229 return status == CL_SUCCESS ? val : 0;
3232 size_t Kernel::preferedWorkGroupSizeMultiple() const
3234 if(!p || !p->handle)
3236 size_t val = 0, retsz = 0;
3237 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3238 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(val), &val, &retsz);
3239 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE)");
3240 return status == CL_SUCCESS ? val : 0;
3243 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
3245 if(!p || !p->handle || !wsz)
3248 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3249 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(wsz[0])*3, wsz, &retsz);
3250 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)");
3251 return status == CL_SUCCESS;
3254 size_t Kernel::localMemSize() const
3256 if(!p || !p->handle)
3260 cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
3261 cl_int status = clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(val), &val, &retsz);
3262 CV_OCL_CHECK_RESULT(status, "clGetKernelWorkGroupInfo(CL_KERNEL_LOCAL_MEM_SIZE)");
3263 return status == CL_SUCCESS ? (size_t)val : 0;
3268 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
3270 struct ProgramSource::Impl
3272 IMPLEMENT_REFCOUNTABLE();
3275 PROGRAM_SOURCE_CODE = 0,
3281 Impl(const String& src)
3283 init(PROGRAM_SOURCE_CODE, cv::String(), cv::String());
3284 initFromSource(src, cv::String());
3286 Impl(const String& module, const String& name, const String& codeStr, const String& codeHash)
3288 init(PROGRAM_SOURCE_CODE, module, name);
3289 initFromSource(codeStr, codeHash);
3293 void init(enum KIND kind, const String& module, const String& name)
3302 isHashUpdated = false;
3305 void initFromSource(const String& codeStr, const String& codeHash)
3308 sourceHash_ = codeHash;
3309 if (sourceHash_.empty())
3315 isHashUpdated = true;
3319 void updateHash(const char* hashStr = NULL)
3323 sourceHash_ = cv::String(hashStr);
3324 isHashUpdated = true;
3330 case PROGRAM_SOURCE_CODE:
3333 CV_Assert(codeStr_.empty());
3334 hash = crc64(sourceAddr_, sourceSize_); // static storage
3338 CV_Assert(!codeStr_.empty());
3339 hash = crc64((uchar*)codeStr_.c_str(), codeStr_.size());
3342 case PROGRAM_BINARIES:
3345 hash = crc64(sourceAddr_, sourceSize_);
3348 CV_Error(Error::StsInternal, "Internal error");
3350 sourceHash_ = cv::format("%08llx", hash);
3351 isHashUpdated = true;
3354 Impl(enum KIND kind,
3355 const String& module, const String& name,
3356 const unsigned char* binary, const size_t size,
3357 const cv::String& buildOptions = cv::String())
3359 init(kind, module, name);
3361 sourceAddr_ = binary;
3364 buildOptions_ = buildOptions;
3367 static ProgramSource fromSourceWithStaticLifetime(const String& module, const String& name,
3368 const char* sourceCodeStaticStr, const char* hashStaticStr,
3369 const cv::String& buildOptions)
3371 ProgramSource result;
3372 result.p = new Impl(PROGRAM_SOURCE_CODE, module, name,
3373 (const unsigned char*)sourceCodeStaticStr, strlen(sourceCodeStaticStr), buildOptions);
3374 result.p->updateHash(hashStaticStr);
3378 static ProgramSource fromBinary(const String& module, const String& name,
3379 const unsigned char* binary, const size_t size,
3380 const cv::String& buildOptions)
3382 ProgramSource result;
3383 result.p = new Impl(PROGRAM_BINARIES, module, name, binary, size, buildOptions);
3387 static ProgramSource fromSPIR(const String& module, const String& name,
3388 const unsigned char* binary, const size_t size,
3389 const cv::String& buildOptions)
3391 ProgramSource result;
3392 result.p = new Impl(PROGRAM_SPIR, module, name, binary, size, buildOptions);
3399 // TODO std::vector<ProgramSource> includes_;
3400 String codeStr_; // PROGRAM_SOURCE_CODE only
3402 const unsigned char* sourceAddr_;
3405 cv::String buildOptions_;
3410 friend struct Program::Impl;
3411 friend struct internal::ProgramEntry;
3412 friend struct Context::Impl;
3416 ProgramSource::ProgramSource()
3421 ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash)
3423 p = new Impl(module, name, codeStr, codeHash);
3426 ProgramSource::ProgramSource(const char* prog)
3431 ProgramSource::ProgramSource(const String& prog)
3436 ProgramSource::~ProgramSource()
3442 ProgramSource::ProgramSource(const ProgramSource& prog)
3449 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
3451 Impl* newp = (Impl*)prog.p;
3460 const String& ProgramSource::source() const
3463 CV_Assert(p->kind_ == Impl::PROGRAM_SOURCE_CODE);
3464 CV_Assert(p->sourceAddr_ == NULL); // method returns reference - can't construct temporary object
3468 ProgramSource::hash_t ProgramSource::hash() const
3470 CV_Error(Error::StsNotImplemented, "Removed method: ProgramSource::hash()");
3473 ProgramSource ProgramSource::fromBinary(const String& module, const String& name,
3474 const unsigned char* binary, const size_t size,
3475 const cv::String& buildOptions)
3478 CV_Assert(size > 0);
3479 return Impl::fromBinary(module, name, binary, size, buildOptions);
3482 ProgramSource ProgramSource::fromSPIR(const String& module, const String& name,
3483 const unsigned char* binary, const size_t size,
3484 const cv::String& buildOptions)
3487 CV_Assert(size > 0);
3488 return Impl::fromBinary(module, name, binary, size, buildOptions);
3492 internal::ProgramEntry::operator ProgramSource&() const
3494 if (this->pProgramSource == NULL)
3496 cv::AutoLock lock(cv::getInitializationMutex());
3497 if (this->pProgramSource == NULL)
3499 ProgramSource ps = ProgramSource::Impl::fromSourceWithStaticLifetime(this->module, this->name, this->programCode, this->programHash, cv::String());
3500 ProgramSource* ptr = new ProgramSource(ps);
3501 const_cast<ProgramEntry*>(this)->pProgramSource = ptr;
3504 return *this->pProgramSource;
3509 /////////////////////////////////////////// Program /////////////////////////////////////////////
3512 cv::String joinBuildOptions(const cv::String& a, const cv::String& b)
3520 return a + (cv::String(" ") + b);
3523 struct Program::Impl
3525 IMPLEMENT_REFCOUNTABLE();
3527 Impl(const ProgramSource& src,
3528 const String& _buildflags, String& errmsg) :
3531 buildflags(_buildflags)
3533 const ProgramSource::Impl* src_ = src.getImpl();
3535 sourceModule_ = src_->module_;
3536 sourceName_ = src_->name_;
3537 const Context ctx = Context::getDefault();
3538 Device device = ctx.device(0);
3539 if (ctx.ptr() == NULL || device.ptr() == NULL)
3541 buildflags = joinBuildOptions(buildflags, src_->buildOptions_);
3542 if (src.getImpl()->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
3545 buildflags = joinBuildOptions(buildflags, " -D AMD_DEVICE");
3546 else if (device.isIntel())
3547 buildflags = joinBuildOptions(buildflags, " -D INTEL_DEVICE");
3548 const String param_buildExtraOptions = getBuildExtraOptions();
3549 if (!param_buildExtraOptions.empty())
3550 buildflags = joinBuildOptions(buildflags, param_buildExtraOptions);
3552 compile(ctx, src_, errmsg);
3555 bool compile(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3557 CV_Assert(ctx.getImpl());
3560 // We don't cache OpenCL binaries
3561 if (src_->kind_ == ProgramSource::Impl::PROGRAM_BINARIES)
3563 CV_LOG_VERBOSE(NULL, 0, "Load program binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
3564 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
3567 return compileWithCache(ctx, src_, errmsg);
3570 bool compileWithCache(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3572 CV_Assert(ctx.getImpl());
3574 CV_Assert(src_->kind_ != ProgramSource::Impl::PROGRAM_BINARIES);
3576 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
3577 OpenCLBinaryCacheConfigurator& config = OpenCLBinaryCacheConfigurator::getSingletonInstance();
3578 const std::string base_dir = config.prepareCacheDirectoryForContext(
3579 ctx.getImpl()->getPrefixString(),
3580 ctx.getImpl()->getPrefixBase()
3582 const String& hash_str = src_->sourceHash_;
3584 if (!base_dir.empty() && !src_->module_.empty() && !src_->name_.empty())
3586 CV_Assert(!hash_str.empty());
3587 fname = src_->module_ + "--" + src_->name_ + "_" + hash_str + ".bin";
3588 fname = utils::fs::join(base_dir, fname);
3590 const cv::Ptr<utils::fs::FileLock> fileLock = config.cache_lock_; // can be empty
3591 if (!fname.empty() && CV_OPENCL_CACHE_ENABLE)
3595 std::vector<char> binaryBuf;
3598 cv::utils::optional_shared_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
3599 BinaryProgramFile file(fname, hash_str.c_str());
3600 res = file.read(buildflags, binaryBuf);
3604 CV_Assert(!binaryBuf.empty());
3605 CV_LOG_VERBOSE(NULL, 0, "Load program binary from cache: " << src_->module_.c_str() << "/" << src_->name_.c_str());
3606 bool isLoaded = createFromBinary(ctx, binaryBuf, errmsg);
3611 catch (const cv::Exception& e)
3614 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname << std::endl << e.what());
3618 CV_LOG_VERBOSE(NULL, 0, "Can't load OpenCL binary: " + fname);
3621 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
3622 CV_Assert(handle == NULL);
3623 if (src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE)
3625 if (!buildFromSources(ctx, src_, errmsg))
3630 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIR)
3632 buildflags = joinBuildOptions(buildflags, " -x spir");
3633 if ((cv::String(" ") + buildflags).find(" -spir-std=") == cv::String::npos)
3635 buildflags = joinBuildOptions(buildflags, " -spir-std=1.2");
3637 CV_LOG_VERBOSE(NULL, 0, "Load program SPIR binary... " << src_->module_.c_str() << "/" << src_->name_.c_str());
3638 bool isLoaded = createFromBinary(ctx, src_->sourceAddr_, src_->sourceSize_, errmsg);
3642 else if (src_->kind_ == ProgramSource::Impl::PROGRAM_SPIRV)
3644 CV_Error(Error::StsNotImplemented, "OpenCL: SPIR-V is not supported");
3648 CV_Error(Error::StsInternal, "Internal error");
3650 CV_Assert(handle != NULL);
3651 #if OPENCV_HAVE_FILESYSTEM_SUPPORT
3652 if (!fname.empty() && CV_OPENCL_CACHE_WRITE)
3656 std::vector<char> binaryBuf;
3657 getProgramBinary(binaryBuf);
3659 cv::utils::optional_lock_guard<cv::utils::fs::FileLock> lock_fs(fileLock.get());
3660 BinaryProgramFile file(fname, hash_str.c_str());
3661 file.write(buildflags, binaryBuf);
3664 catch (const cv::Exception& e)
3666 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname << std::endl << e.what());
3670 CV_LOG_WARNING(NULL, "Can't save OpenCL binary into cache: " + fname);
3673 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
3674 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3675 if (CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3677 std::vector<char> binaryBuf;
3678 getProgramBinary(binaryBuf);
3679 if (!binaryBuf.empty())
3681 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3683 createFromBinary(ctx, binaryBuf, errmsg);
3687 return handle != NULL;
3690 void dumpBuildLog_(cl_int result, const cl_device_id* deviceList, String& errmsg)
3692 AutoBuffer<char, 4096> buffer; buffer[0] = 0;
3695 cl_int log_retval = clGetProgramBuildInfo(handle, deviceList[0],
3696 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
3697 if (log_retval == CL_SUCCESS && retsz > 1)
3699 buffer.resize(retsz + 16);
3700 log_retval = clGetProgramBuildInfo(handle, deviceList[0],
3701 CL_PROGRAM_BUILD_LOG, retsz+1, buffer.data(), &retsz);
3702 if (log_retval == CL_SUCCESS)
3704 if (retsz < buffer.size())
3707 buffer[buffer.size() - 1] = 0;
3715 errmsg = String(buffer.data());
3716 printf("OpenCL program build log: %s/%s\nStatus %d: %s\n%s\n%s\n",
3717 sourceModule_.c_str(), sourceName_.c_str(),
3718 result, getOpenCLErrorString(result),
3719 buildflags.c_str(), errmsg.c_str());
3723 bool buildFromSources(const Context& ctx, const ProgramSource::Impl* src_, String& errmsg)
3726 CV_Assert(src_->kind_ == ProgramSource::Impl::PROGRAM_SOURCE_CODE);
3727 CV_Assert(handle == NULL);
3728 CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Build OpenCL program: %s/%s %s options: %s",
3729 sourceModule_.c_str(), sourceName_.c_str(),
3730 src_->sourceHash_.c_str(), buildflags.c_str()).c_str());
3732 CV_LOG_VERBOSE(NULL, 0, "Compile... " << sourceModule_.c_str() << "/" << sourceName_.c_str());
3734 const char* srcptr = src_->sourceAddr_ ? ((const char*)src_->sourceAddr_) : src_->codeStr_.c_str();
3735 size_t srclen = src_->sourceAddr_ ? src_->sourceSize_ : src_->codeStr_.size();
3736 CV_Assert(srcptr != NULL);
3737 CV_Assert(srclen > 0);
3741 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
3742 CV_OCL_DBG_CHECK_RESULT(retval, "clCreateProgramWithSource");
3743 CV_Assert(handle || retval != CL_SUCCESS);
3744 if (handle && retval == CL_SUCCESS)
3746 size_t n = ctx.ndevices();
3747 AutoBuffer<cl_device_id, 4> deviceListBuf(n + 1);
3748 cl_device_id* deviceList = deviceListBuf.data();
3749 for (size_t i = 0; i < n; i++)
3751 deviceList[i] = (cl_device_id)(ctx.device(i).ptr());
3754 retval = clBuildProgram(handle, (cl_uint)n, deviceList, buildflags.c_str(), 0, 0);
3755 CV_OCL_TRACE_CHECK_RESULT(/*don't throw: retval*/CL_SUCCESS, cv::format("clBuildProgram(source: %s)", buildflags.c_str()).c_str());
3756 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
3757 if (retval != CL_SUCCESS)
3760 dumpBuildLog_(retval, deviceList, errmsg);
3762 // don't remove "retval != CL_SUCCESS" condition here:
3763 // it would break CV_OPENCL_ALWAYS_SHOW_BUILD_LOG mode
3764 if (retval != CL_SUCCESS && handle)
3766 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3770 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3771 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3773 CV_LOG_INFO(NULL, "OpenCL: query kernel names (build from sources)...");
3775 char kernels_buffer[4096] = {0};
3776 cl_int result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
3777 if (retsz < sizeof(kernels_buffer))
3778 kernels_buffer[retsz] = 0;
3780 kernels_buffer[0] = 0;
3781 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
3786 return handle != NULL;
3789 void getProgramBinary(std::vector<char>& buf)
3793 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(sz), &sz, NULL));
3795 uchar* ptr = (uchar*)&buf[0];
3796 CV_OCL_CHECK(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(ptr), &ptr, NULL));
3799 bool createFromBinary(const Context& ctx, const std::vector<char>& buf, String& errmsg)
3801 return createFromBinary(ctx, (const unsigned char*)&buf[0], buf.size(), errmsg);
3804 bool createFromBinary(const Context& ctx, const unsigned char* binaryAddr, const size_t binarySize, String& errmsg)
3806 CV_Assert(handle == NULL);
3807 CV_INSTRUMENT_REGION_OPENCL_COMPILE("Load OpenCL program");
3808 CV_LOG_VERBOSE(NULL, 0, "Load from binary... (" << binarySize << " bytes)");
3810 CV_Assert(binarySize > 0);
3812 size_t ndevices = (int)ctx.ndevices();
3813 AutoBuffer<cl_device_id> devices_(ndevices);
3814 AutoBuffer<const uchar*> binaryPtrs_(ndevices);
3815 AutoBuffer<size_t> binarySizes_(ndevices);
3817 cl_device_id* devices = devices_.data();
3818 const uchar** binaryPtrs = binaryPtrs_.data();
3819 size_t* binarySizes = binarySizes_.data();
3820 for (size_t i = 0; i < ndevices; i++)
3822 devices[i] = (cl_device_id)ctx.device(i).ptr();
3823 binaryPtrs[i] = binaryAddr;
3824 binarySizes[i] = binarySize;
3828 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), (cl_uint)ndevices, devices_.data(),
3829 binarySizes, binaryPtrs, NULL, &result);
3830 if (result != CL_SUCCESS)
3832 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clCreateProgramWithBinary"));
3835 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3843 // call clBuildProgram()
3845 result = clBuildProgram(handle, (cl_uint)ndevices, devices_.data(), buildflags.c_str(), 0, 0);
3846 CV_OCL_DBG_CHECK_RESULT(result, cv::format("clBuildProgram(binary: %s/%s)", sourceModule_.c_str(), sourceName_.c_str()).c_str());
3847 if (result != CL_SUCCESS)
3849 dumpBuildLog_(result, devices, errmsg);
3852 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3858 // check build status
3860 cl_build_status build_status = CL_BUILD_NONE;
3862 CV_OCL_DBG_CHECK(result = clGetProgramBuildInfo(handle, devices[0], CL_PROGRAM_BUILD_STATUS,
3863 sizeof(build_status), &build_status, &retsz));
3864 if (result == CL_SUCCESS)
3866 if (build_status == CL_BUILD_SUCCESS)
3872 CV_LOG_WARNING(NULL, "clGetProgramBuildInfo() returns " << build_status);
3878 CV_LOG_ERROR(NULL, CV_OCL_API_ERROR_MSG(result, "clGetProgramBuildInfo()"));
3881 CV_OCL_DBG_CHECK(clReleaseProgram(handle));
3886 #if CV_OPENCL_VALIDATE_BINARY_PROGRAMS
3887 if (handle && CV_OPENCL_VALIDATE_BINARY_PROGRAMS_VALUE)
3889 CV_LOG_INFO(NULL, "OpenCL: query kernel names (binary)...");
3891 char kernels_buffer[4096] = {0};
3892 result = clGetProgramInfo(handle, CL_PROGRAM_KERNEL_NAMES, sizeof(kernels_buffer), &kernels_buffer[0], &retsz);
3893 if (retsz < sizeof(kernels_buffer))
3894 kernels_buffer[retsz] = 0;
3896 kernels_buffer[0] = 0;
3897 CV_LOG_INFO(NULL, result << ": Kernels='" << kernels_buffer << "'");
3900 return handle != NULL;
3908 if (!cv::__termination)
3911 clReleaseProgram(handle);
3920 String sourceModule_;
3925 Program::Program() { p = 0; }
3927 Program::Program(const ProgramSource& src,
3928 const String& buildflags, String& errmsg)
3931 create(src, buildflags, errmsg);
3934 Program::Program(const Program& prog)
3941 Program& Program::operator = (const Program& prog)
3943 Impl* newp = (Impl*)prog.p;
3958 bool Program::create(const ProgramSource& src,
3959 const String& buildflags, String& errmsg)
3966 p = new Impl(src, buildflags, errmsg);
3975 void* Program::ptr() const
3977 return p ? p->handle : 0;
3980 #ifndef OPENCV_REMOVE_DEPRECATED_API
3981 const ProgramSource& Program::source() const
3983 CV_Error(Error::StsNotImplemented, "Removed API");
3986 bool Program::read(const String& bin, const String& buildflags)
3988 CV_UNUSED(bin); CV_UNUSED(buildflags);
3989 CV_Error(Error::StsNotImplemented, "Removed API");
3992 bool Program::write(String& bin) const
3995 CV_Error(Error::StsNotImplemented, "Removed API");
3998 String Program::getPrefix() const
4002 Context::Impl* ctx_ = Context::getDefault().getImpl();
4004 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), p->buildflags.c_str());
4007 String Program::getPrefix(const String& buildflags)
4009 Context::Impl* ctx_ = Context::getDefault().getImpl();
4011 return cv::format("opencl=%s\nbuildflags=%s", ctx_->getPrefixString().c_str(), buildflags.c_str());
4013 #endif // OPENCV_REMOVE_DEPRECATED_API
4015 void Program::getBinary(std::vector<char>& binary) const
4017 CV_Assert(p && "Empty program");
4018 p->getProgramBinary(binary);
4021 Program Context::Impl::getProg(const ProgramSource& src,
4022 const String& buildflags, String& errmsg)
4024 size_t limit = getProgramCountLimit();
4025 const ProgramSource::Impl* src_ = src.getImpl();
4027 String key = cv::format("module=%s name=%s codehash=%s\nopencl=%s\nbuildflags=%s",
4028 src_->module_.c_str(), src_->name_.c_str(), src_->sourceHash_.c_str(),
4029 getPrefixString().c_str(),
4030 buildflags.c_str());
4032 cv::AutoLock lock(program_cache_mutex);
4033 phash_t::iterator it = phash.find(key);
4034 if (it != phash.end())
4037 CacheList::iterator i = std::find(cacheList.begin(), cacheList.end(), key);
4038 if (i != cacheList.end() && i != cacheList.begin())
4041 cacheList.push_front(key);
4045 { // cleanup program cache
4046 size_t sz = phash.size();
4047 if (limit > 0 && sz >= limit)
4049 static bool warningFlag = false;
4052 printf("\nWARNING: OpenCV-OpenCL:\n"
4053 " In-memory cache for OpenCL programs is full, older programs will be unloaded.\n"
4054 " You can change cache size via OPENCV_OPENCL_PROGRAM_CACHE environment variable\n\n");
4057 while (!cacheList.empty())
4059 size_t c = phash.erase(cacheList.back());
4060 cacheList.pop_back();
4067 Program prog(src, buildflags, errmsg);
4068 // Cache result of build failures too (to prevent unnecessary compiler invocations)
4070 cv::AutoLock lock(program_cache_mutex);
4071 phash.insert(std::pair<std::string, Program>(key, prog));
4072 cacheList.push_front(key);
4078 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
4080 template<typename T>
4081 class OpenCLBufferPool
4084 ~OpenCLBufferPool() { }
4086 virtual T allocate(size_t size) = 0;
4087 virtual void release(T buffer) = 0;
4090 template <typename Derived, typename BufferEntry, typename T>
4091 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
4094 inline Derived& derived() { return *static_cast<Derived*>(this); }
4098 size_t currentReservedSize;
4099 size_t maxReservedSize;
4101 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
4102 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
4105 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
4107 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
4108 for (; i != allocatedEntries_.end(); ++i)
4110 BufferEntry& e = *i;
4111 if (e.clBuffer_ == buffer)
4114 allocatedEntries_.erase(i);
4122 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
4124 if (reservedEntries_.empty())
4126 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4127 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
4129 size_t minDiff = (size_t)(-1);
4130 for (; i != reservedEntries_.end(); ++i)
4132 BufferEntry& e = *i;
4133 if (e.capacity_ >= size)
4135 size_t diff = e.capacity_ - size;
4136 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
4146 if (result_pos != reservedEntries_.end())
4148 //CV_DbgAssert(result == *result_pos);
4149 reservedEntries_.erase(result_pos);
4151 currentReservedSize -= entry.capacity_;
4152 allocatedEntries_.push_back(entry);
4159 void _checkSizeOfReservedEntries()
4161 while (currentReservedSize > maxReservedSize)
4163 CV_DbgAssert(!reservedEntries_.empty());
4164 const BufferEntry& entry = reservedEntries_.back();
4165 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4166 currentReservedSize -= entry.capacity_;
4167 derived()._releaseBufferEntry(entry);
4168 reservedEntries_.pop_back();
4172 inline size_t _allocationGranularity(size_t size)
4175 if (size < 1024*1024)
4176 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
4177 else if (size < 16*1024*1024)
4184 OpenCLBufferPoolBaseImpl()
4185 : currentReservedSize(0),
4190 virtual ~OpenCLBufferPoolBaseImpl()
4192 freeAllReservedBuffers();
4193 CV_Assert(reservedEntries_.empty());
4196 virtual T allocate(size_t size) CV_OVERRIDE
4198 AutoLock locker(mutex_);
4200 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
4202 CV_DbgAssert(size <= entry.capacity_);
4203 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
4207 derived()._allocateBufferEntry(entry, size);
4209 return entry.clBuffer_;
4211 virtual void release(T buffer) CV_OVERRIDE
4213 AutoLock locker(mutex_);
4215 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
4216 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
4218 derived()._releaseBufferEntry(entry);
4222 reservedEntries_.push_front(entry);
4223 currentReservedSize += entry.capacity_;
4224 _checkSizeOfReservedEntries();
4228 virtual size_t getReservedSize() const CV_OVERRIDE { return currentReservedSize; }
4229 virtual size_t getMaxReservedSize() const CV_OVERRIDE { return maxReservedSize; }
4230 virtual void setMaxReservedSize(size_t size) CV_OVERRIDE
4232 AutoLock locker(mutex_);
4233 size_t oldMaxReservedSize = maxReservedSize;
4234 maxReservedSize = size;
4235 if (maxReservedSize < oldMaxReservedSize)
4237 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
4238 for (; i != reservedEntries_.end();)
4240 const BufferEntry& entry = *i;
4241 if (entry.capacity_ > maxReservedSize / 8)
4243 CV_DbgAssert(currentReservedSize >= entry.capacity_);
4244 currentReservedSize -= entry.capacity_;
4245 derived()._releaseBufferEntry(entry);
4246 i = reservedEntries_.erase(i);
4251 _checkSizeOfReservedEntries();
4254 virtual void freeAllReservedBuffers() CV_OVERRIDE
4256 AutoLock locker(mutex_);
4257 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
4258 for (; i != reservedEntries_.end(); ++i)
4260 const BufferEntry& entry = *i;
4261 derived()._releaseBufferEntry(entry);
4263 reservedEntries_.clear();
4264 currentReservedSize = 0;
4268 struct CLBufferEntry
4272 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
4275 class OpenCLBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
4278 typedef struct CLBufferEntry BufferEntry;
4282 OpenCLBufferPoolImpl(int createFlags = 0)
4283 : createFlags_(createFlags)
4287 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4289 CV_DbgAssert(entry.clBuffer_ == NULL);
4290 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4291 Context& ctx = Context::getDefault();
4292 cl_int retval = CL_SUCCESS;
4293 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
4294 CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
4295 CV_Assert(entry.clBuffer_ != NULL);
4296 if(retval == CL_SUCCESS)
4298 CV_IMPL_ADD(CV_IMPL_OCL);
4300 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
4301 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4302 allocatedEntries_.push_back(entry);
4305 void _releaseBufferEntry(const BufferEntry& entry)
4307 CV_Assert(entry.capacity_ != 0);
4308 CV_Assert(entry.clBuffer_ != NULL);
4309 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
4310 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4311 CV_OCL_DBG_CHECK(clReleaseMemObject(entry.clBuffer_));
4315 #ifdef HAVE_OPENCL_SVM
4316 struct CLSVMBufferEntry
4320 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
4322 class OpenCLSVMBufferPoolImpl CV_FINAL : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
4325 typedef struct CLSVMBufferEntry BufferEntry;
4327 OpenCLSVMBufferPoolImpl()
4331 void _allocateBufferEntry(BufferEntry& entry, size_t size)
4333 CV_DbgAssert(entry.clBuffer_ == NULL);
4334 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
4336 Context& ctx = Context::getDefault();
4337 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4338 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4339 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
4340 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4342 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4343 CV_DbgAssert(svmFns->isValid());
4345 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
4346 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
4349 entry.clBuffer_ = buf;
4351 CV_IMPL_ADD(CV_IMPL_OCL);
4353 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
4354 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
4355 allocatedEntries_.push_back(entry);
4358 void _releaseBufferEntry(const BufferEntry& entry)
4360 CV_Assert(entry.capacity_ != 0);
4361 CV_Assert(entry.clBuffer_ != NULL);
4362 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
4363 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
4364 Context& ctx = Context::getDefault();
4365 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4366 CV_DbgAssert(svmFns->isValid());
4367 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
4368 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
4375 template <bool readAccess, bool writeAccess>
4376 class AlignedDataPtr
4380 uchar* const originPtr_;
4381 const size_t alignment_;
4383 uchar* allocatedPtr_;
4386 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
4387 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
4389 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4390 CV_DbgAssert(!readAccess || ptr);
4391 if (((size_t)ptr_ & (alignment - 1)) != 0)
4393 allocatedPtr_ = new uchar[size_ + alignment - 1];
4394 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4397 memcpy(ptr_, originPtr_, size_);
4402 uchar* getAlignedPtr() const
4404 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
4414 memcpy(originPtr_, ptr_, size_);
4416 delete[] allocatedPtr_;
4417 allocatedPtr_ = NULL;
4422 AlignedDataPtr(const AlignedDataPtr&); // disabled
4423 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
4426 template <bool readAccess, bool writeAccess>
4427 class AlignedDataPtr2D
4431 uchar* const originPtr_;
4432 const size_t alignment_;
4434 uchar* allocatedPtr_;
4440 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
4441 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
4443 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4444 CV_DbgAssert(!readAccess || ptr != NULL);
4445 if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
4447 allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
4448 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
4451 for (size_t i = 0; i < rows_; i++)
4452 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
4457 uchar* getAlignedPtr() const
4459 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
4469 for (size_t i = 0; i < rows_; i++)
4470 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
4472 delete[] allocatedPtr_;
4473 allocatedPtr_ = NULL;
4478 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
4479 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
4482 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
4483 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
4486 class OpenCLAllocator CV_FINAL : public MatAllocator
4488 mutable OpenCLBufferPoolImpl bufferPool;
4489 mutable OpenCLBufferPoolImpl bufferPoolHostPtr;
4490 #ifdef HAVE_OPENCL_SVM
4491 mutable OpenCLSVMBufferPoolImpl bufferPoolSVM;
4497 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
4498 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1,
4499 #ifdef HAVE_OPENCL_SVM
4500 ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2,
4502 ALLOCATOR_FLAGS_EXTERNAL_BUFFER = 1 << 3 // convertFromBuffer()
4507 bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR)
4509 size_t defaultPoolSize, poolSize;
4510 defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
4511 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
4512 bufferPool.setMaxReservedSize(poolSize);
4513 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
4514 bufferPoolHostPtr.setMaxReservedSize(poolSize);
4515 #ifdef HAVE_OPENCL_SVM
4516 poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
4517 bufferPoolSVM.setMaxReservedSize(poolSize);
4520 matStdAllocator = Mat::getDefaultAllocator();
4524 flushCleanupQueue();
4527 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
4528 int flags, UMatUsageFlags usageFlags) const
4530 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
4534 static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
4536 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
4539 static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
4541 static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
4545 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
4547 const Device& dev = ctx.device(0);
4549 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
4550 createFlags |= CL_MEM_ALLOC_HOST_PTR;
4552 if (!isOpenCLCopyingForced() &&
4553 (isOpenCLMapForced() ||
4554 (dev.hostUnifiedMemory()
4563 flags0 = UMatData::COPY_ON_MAP;
4566 UMatData* allocate(int dims, const int* sizes, int type,
4567 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const CV_OVERRIDE
4570 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
4571 CV_Assert(data == 0);
4572 size_t total = CV_ELEM_SIZE(type);
4573 for( int i = dims-1; i >= 0; i-- )
4580 Context& ctx = Context::getDefault();
4581 flushCleanupQueue();
4583 int createFlags = 0, flags0 = 0;
4584 getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
4586 void* handle = NULL;
4587 int allocatorFlags = 0;
4589 #ifdef HAVE_OPENCL_SVM
4590 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4591 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
4593 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
4594 handle = bufferPoolSVM.allocate(total);
4596 // this property is constant, so single buffer pool can be used here
4597 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4598 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
4602 if (createFlags == 0)
4604 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
4605 handle = bufferPool.allocate(total);
4607 else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
4609 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
4610 handle = bufferPoolHostPtr.allocate(total);
4614 CV_Assert(handle != NULL); // Unsupported, throw
4618 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
4620 UMatData* u = new UMatData(this);
4625 u->allocatorFlags_ = allocatorFlags;
4626 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
4627 u->markHostCopyObsolete(true);
4628 opencl_allocator_stats.onAllocate(u->size);
4632 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const CV_OVERRIDE
4637 flushCleanupQueue();
4639 UMatDataAutoLock lock(u);
4643 CV_Assert(u->origdata != 0);
4644 Context& ctx = Context::getDefault();
4645 int createFlags = 0, flags0 = 0;
4646 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
4648 bool copyOnMap = (flags0 & UMatData::COPY_ON_MAP) != 0;
4650 cl_context ctx_handle = (cl_context)ctx.ptr();
4651 int allocatorFlags = 0;
4652 int tempUMatFlags = 0;
4653 void* handle = NULL;
4654 cl_int retval = CL_SUCCESS;
4656 #ifdef HAVE_OPENCL_SVM
4657 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
4658 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
4659 if (useSVM && svmCaps.isSupportFineGrainSystem())
4661 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
4662 tempUMatFlags = UMatData::TEMP_UMAT;
4663 handle = u->origdata;
4664 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
4666 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
4668 if (!(accessFlags & ACCESS_FAST)) // memcpy used
4670 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
4672 cl_svm_mem_flags memFlags = createFlags |
4673 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
4675 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4676 CV_DbgAssert(svmFns->isValid());
4678 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
4679 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
4682 cl_command_queue q = NULL;
4683 if (!isFineGrainBuffer)
4685 q = (cl_command_queue)Queue::getDefault().ptr();
4686 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
4687 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
4690 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
4693 memcpy(handle, u->origdata, u->size);
4694 if (!isFineGrainBuffer)
4696 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
4697 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
4698 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4701 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
4702 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
4703 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
4710 accessFlags &= ~ACCESS_FAST;
4712 tempUMatFlags = UMatData::TEMP_UMAT;
4717 CV_OPENCL_ENABLE_MEM_USE_HOST_PTR
4718 // There are OpenCL runtime issues for less aligned data
4719 && (CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR != 0
4720 && u->origdata == cv::alignPtr(u->origdata, (int)CV_OPENCL_ALIGNMENT_MEM_USE_HOST_PTR))
4721 // Avoid sharing of host memory between OpenCL buffers
4722 && !(u->originalUMatData && u->originalUMatData->handle)
4725 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
4726 u->size, u->origdata, &retval);
4727 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
4728 (long long int)u->size, u->origdata, (void*)handle).c_str());
4730 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
4732 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
4733 u->size, u->origdata, &retval);
4734 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
4735 (long long int)u->size, u->origdata, (void*)handle).c_str());
4736 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
4739 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
4740 if(!handle || retval != CL_SUCCESS)
4743 u->prevAllocator = u->currAllocator;
4744 u->currAllocator = this;
4745 u->flags |= tempUMatFlags | flags0;
4746 u->allocatorFlags_ = allocatorFlags;
4748 if(accessFlags & ACCESS_WRITE)
4749 u->markHostCopyObsolete(true);
4750 opencl_allocator_stats.onAllocate(u->size);
4754 /*void sync(UMatData* u) const
4756 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4757 UMatDataAutoLock lock(u);
4759 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
4761 if( u->tempCopiedUMat() )
4763 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4764 u->size, u->origdata, 0, 0, 0);
4769 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
4770 (CL_MAP_READ | CL_MAP_WRITE),
4771 0, u->size, 0, 0, 0, &retval);
4772 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
4775 u->markHostCopyObsolete(false);
4777 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
4779 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4780 u->size, u->data, 0, 0, 0);
4784 void deallocate(UMatData* u) const CV_OVERRIDE
4789 CV_Assert(u->urefcount == 0);
4790 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
4792 CV_Assert(u->handle != 0);
4793 CV_Assert(u->mapcount == 0);
4795 if (u->flags & UMatData::ASYNC_CLEANUP)
4796 addToCleanupQueue(u);
4801 void deallocate_(UMatData* u) const
4804 CV_Assert(u->handle);
4805 if ((u->allocatorFlags_ & ALLOCATOR_FLAGS_EXTERNAL_BUFFER) == 0)
4807 opencl_allocator_stats.onFree(u->size);
4811 if (cv::__termination) // process is not in consistent state (after ExitProcess call) and terminating
4812 return; // avoid any OpenCL calls
4816 CV_Assert(u->origdata);
4817 // UMatDataAutoLock lock(u);
4819 if (u->hostCopyObsolete())
4821 #ifdef HAVE_OPENCL_SVM
4822 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
4824 Context& ctx = Context::getDefault();
4825 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4826 CV_DbgAssert(svmFns->isValid());
4828 if( u->tempCopiedUMat() )
4830 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
4831 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
4832 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
4833 cl_command_queue q = NULL;
4834 if (!isFineGrainBuffer)
4836 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
4837 q = (cl_command_queue)Queue::getDefault().ptr();
4838 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
4839 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
4842 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
4845 memcpy(u->origdata, u->handle, u->size);
4846 if (!isFineGrainBuffer)
4848 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
4849 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
4850 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4855 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
4862 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4863 if( u->tempCopiedUMat() )
4865 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
4866 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
4867 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0));
4874 CV_Assert(u->mapcount == 0);
4875 flushCleanupQueue(); // workaround for CL_OUT_OF_RESOURCES problem (#9960)
4876 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
4877 (CL_MAP_READ | CL_MAP_WRITE),
4878 0, u->size, 0, 0, 0, &retval);
4879 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
4880 CV_Assert(u->origdata == data && "Details: https://github.com/opencv/opencv/issues/6293");
4881 if (u->originalUMatData)
4883 CV_Assert(u->originalUMatData->data == data);
4885 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
4886 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
4887 CV_OCL_DBG_CHECK(clFinish(q));
4891 u->markHostCopyObsolete(false);
4897 #ifdef HAVE_OPENCL_SVM
4898 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
4900 if( u->tempCopiedUMat() )
4902 Context& ctx = Context::getDefault();
4903 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4904 CV_DbgAssert(svmFns->isValid());
4906 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
4907 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
4913 cl_int retval = clReleaseMemObject((cl_mem)u->handle);
4914 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
4917 u->markDeviceCopyObsolete(true);
4918 u->currAllocator = u->prevAllocator;
4919 u->prevAllocator = NULL;
4920 if(u->data && u->copyOnMap() && u->data != u->origdata)
4922 u->data = u->origdata;
4923 u->currAllocator->deallocate(u);
4928 CV_Assert(u->origdata == NULL);
4929 if(u->data && u->copyOnMap() && u->data != u->origdata)
4933 u->markHostCopyObsolete(true);
4935 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
4937 bufferPool.release((cl_mem)u->handle);
4939 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
4941 bufferPoolHostPtr.release((cl_mem)u->handle);
4943 #ifdef HAVE_OPENCL_SVM
4944 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
4946 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
4950 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
4951 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
4953 Context& ctx = Context::getDefault();
4954 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
4955 CV_DbgAssert(svmFns->isValid());
4956 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4958 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
4960 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
4961 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
4962 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
4965 bufferPoolSVM.release((void*)u->handle);
4970 CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle));
4973 u->markDeviceCopyObsolete(true);
4977 CV_Assert(u == NULL);
4980 // synchronized call (external UMatDataAutoLock, see UMat::getMat)
4981 void map(UMatData* u, int accessFlags) const CV_OVERRIDE
4983 CV_Assert(u && u->handle);
4985 if(accessFlags & ACCESS_WRITE)
4986 u->markDeviceCopyObsolete(true);
4988 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
4991 if( !u->copyOnMap() )
4994 // because there can be other map requests for the same UMat with different access flags,
4995 // we use the universal (read-write) access mode.
4996 #ifdef HAVE_OPENCL_SVM
4997 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
4999 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5001 Context& ctx = Context::getDefault();
5002 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5003 CV_DbgAssert(svmFns->isValid());
5005 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
5007 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5008 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
5011 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5012 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
5016 u->data = (uchar*)u->handle;
5017 u->markHostCopyObsolete(false);
5018 u->markDeviceMemMapped(true);
5023 cl_int retval = CL_SUCCESS;
5024 if (!u->deviceMemMapped())
5026 CV_Assert(u->refcount == 1);
5027 CV_Assert(u->mapcount++ == 0);
5028 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
5029 (CL_MAP_READ | CL_MAP_WRITE),
5030 0, u->size, 0, 0, 0, &retval);
5031 CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
5033 if (u->data && retval == CL_SUCCESS)
5035 u->markHostCopyObsolete(false);
5036 u->markDeviceMemMapped(true);
5040 // TODO Is it really a good idea and was it tested well?
5041 // if map failed, switch to copy-on-map mode for the particular buffer
5042 u->flags |= UMatData::COPY_ON_MAP;
5047 u->data = (uchar*)fastMalloc(u->size);
5048 u->markHostCopyObsolete(true);
5052 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
5054 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5055 #ifdef HAVE_OPENCL_SVM
5056 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5058 cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5059 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5060 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5061 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5062 u->markHostCopyObsolete(false);
5066 void unmap(UMatData* u) const CV_OVERRIDE
5072 CV_Assert(u->handle != 0);
5074 UMatDataAutoLock autolock(u);
5076 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5078 if( !u->copyOnMap() && u->deviceMemMapped() )
5080 CV_Assert(u->data != NULL);
5081 #ifdef HAVE_OPENCL_SVM
5082 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5084 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5086 Context& ctx = Context::getDefault();
5087 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5088 CV_DbgAssert(svmFns->isValid());
5090 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
5092 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5093 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5095 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5097 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
5100 if (u->refcount == 0)
5102 u->markDeviceCopyObsolete(false);
5103 u->markHostCopyObsolete(true);
5107 if (u->refcount == 0)
5109 CV_Assert(u->mapcount-- == 1);
5110 retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
5111 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
5112 if (Device::getDefault().isAMD())
5114 // required for multithreaded applications (see stitching test)
5115 CV_OCL_DBG_CHECK(clFinish(q));
5117 u->markDeviceMemMapped(false);
5119 u->markDeviceCopyObsolete(false);
5120 u->markHostCopyObsolete(true);
5123 else if( u->copyOnMap() && u->deviceCopyObsolete() )
5125 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
5126 #ifdef HAVE_OPENCL_SVM
5127 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
5129 retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5130 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
5131 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
5132 (void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5133 u->markDeviceCopyObsolete(false);
5134 u->markHostCopyObsolete(true);
5138 bool checkContinuous(int dims, const size_t sz[],
5139 const size_t srcofs[], const size_t srcstep[],
5140 const size_t dstofs[], const size_t dststep[],
5141 size_t& total, size_t new_sz[],
5142 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
5143 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
5145 bool iscontinuous = true;
5146 srcrawofs = srcofs ? srcofs[dims-1] : 0;
5147 dstrawofs = dstofs ? dstofs[dims-1] : 0;
5149 for( int i = dims-2; i >= 0; i-- )
5151 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
5152 iscontinuous = false;
5155 srcrawofs += srcofs[i]*srcstep[i];
5157 dstrawofs += dstofs[i]*dststep[i];
5162 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
5165 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
5166 // we assume that new_... arrays are initialized by caller
5167 // with 0's, so there is no else branch
5170 new_srcofs[0] = srcofs[1];
5171 new_srcofs[1] = srcofs[0];
5177 new_dstofs[0] = dstofs[1];
5178 new_dstofs[1] = dstofs[0];
5182 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
5183 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
5187 // we could check for dims == 3 here,
5188 // but from user perspective this one is more informative
5189 CV_Assert(dims <= 3);
5190 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
5193 new_srcofs[0] = srcofs[2];
5194 new_srcofs[1] = srcofs[1];
5195 new_srcofs[2] = srcofs[0];
5200 new_dstofs[0] = dstofs[2];
5201 new_dstofs[1] = dstofs[1];
5202 new_dstofs[2] = dstofs[0];
5205 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
5206 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
5209 return iscontinuous;
5212 void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
5213 const size_t srcofs[], const size_t srcstep[],
5214 const size_t dststep[]) const CV_OVERRIDE
5218 UMatDataAutoLock autolock(u);
5220 if( u->data && !u->hostCopyObsolete() )
5222 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
5225 CV_Assert( u->handle != 0 );
5227 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5229 size_t total = 0, new_sz[] = {0, 0, 0};
5230 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5231 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5233 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
5235 srcrawofs, new_srcofs, new_srcstep,
5236 dstrawofs, new_dstofs, new_dststep);
5238 #ifdef HAVE_OPENCL_SVM
5239 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5241 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5242 Context& ctx = Context::getDefault();
5243 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5244 CV_DbgAssert(svmFns->isValid());
5246 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5247 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5249 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5250 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
5253 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5258 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
5262 // This code is from MatAllocator::download()
5263 int isz[CV_MAX_DIM];
5264 uchar* srcptr = (uchar*)u->handle;
5265 for( int i = 0; i < dims; i++ )
5267 CV_Assert( sz[i] <= (size_t)INT_MAX );
5271 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5272 isz[i] = (int)sz[i];
5275 Mat src(dims, isz, CV_8U, srcptr, srcstep);
5276 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5278 const Mat* arrays[] = { &src, &dst };
5280 NAryMatIterator it(arrays, ptrs, 2);
5281 size_t j, planesz = it.size;
5283 for( j = 0; j < it.nplanes; j++, ++it )
5284 memcpy(ptrs[1], ptrs[0], planesz);
5286 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5288 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5289 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5291 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5300 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5301 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5302 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
5304 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5306 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5307 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5308 size_t membuf_ofs = srcrawofs - new_srcrawofs;
5309 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5310 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5311 uchar* ptr = alignedPtr.getAlignedPtr();
5313 CV_Assert(new_srcstep[0] >= new_sz[0]);
5314 total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
5315 total = std::min(total, u->size - new_srcrawofs);
5316 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5317 new_srcrawofs, total, ptr, 0, 0, 0));
5318 for( size_t i = 0; i < new_sz[1]; i++ )
5319 memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
5323 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5324 uchar* ptr = alignedPtr.getAlignedPtr();
5326 CV_OCL_CHECK(clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5327 new_srcofs, new_dstofs, new_sz,
5335 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
5336 const size_t dstofs[], const size_t dststep[],
5337 const size_t srcstep[]) const CV_OVERRIDE
5342 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5343 CV_Assert(u->refcount == 0 || u->tempUMat());
5345 size_t total = 0, new_sz[] = {0, 0, 0};
5346 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5347 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5349 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
5351 srcrawofs, new_srcofs, new_srcstep,
5352 dstrawofs, new_dstofs, new_dststep);
5354 UMatDataAutoLock autolock(u);
5356 // if there is cached CPU copy of the GPU matrix,
5357 // we could use it as a destination.
5358 // we can do it in 2 cases:
5359 // 1. we overwrite the whole content
5360 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
5361 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
5363 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
5364 u->markHostCopyObsolete(false);
5365 u->markDeviceCopyObsolete(true);
5369 CV_Assert( u->handle != 0 );
5370 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5372 #ifdef HAVE_OPENCL_SVM
5373 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5375 CV_DbgAssert(u->data == NULL || u->data == u->handle);
5376 Context& ctx = Context::getDefault();
5377 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5378 CV_DbgAssert(svmFns->isValid());
5380 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
5381 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5383 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
5384 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
5387 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMap()");
5392 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
5396 // This code is from MatAllocator::upload()
5397 int isz[CV_MAX_DIM];
5398 uchar* dstptr = (uchar*)u->handle;
5399 for( int i = 0; i < dims; i++ )
5401 CV_Assert( sz[i] <= (size_t)INT_MAX );
5405 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
5406 isz[i] = (int)sz[i];
5409 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
5410 Mat dst(dims, isz, CV_8U, dstptr, dststep);
5412 const Mat* arrays[] = { &src, &dst };
5414 NAryMatIterator it(arrays, ptrs, 2);
5415 size_t j, planesz = it.size;
5417 for( j = 0; j < it.nplanes; j++, ++it )
5418 memcpy(ptrs[1], ptrs[0], planesz);
5420 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
5422 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
5423 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
5425 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
5434 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
5435 cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5436 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
5437 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
5438 (void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
5440 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5442 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5443 size_t new_dstrawofs = dstrawofs & ~(padding-1);
5444 size_t membuf_ofs = dstrawofs - new_dstrawofs;
5445 AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
5446 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5447 uchar* ptr = alignedPtr.getAlignedPtr();
5449 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5450 total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
5451 total = std::min(total, u->size - new_dstrawofs);
5452 /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
5453 (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
5454 (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
5455 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5456 new_dstrawofs, total, ptr, 0, 0, 0));
5457 for( size_t i = 0; i < new_sz[1]; i++ )
5458 memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
5459 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5460 new_dstrawofs, total, ptr, 0, 0, 0));
5464 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
5465 uchar* ptr = alignedPtr.getAlignedPtr();
5467 CV_OCL_CHECK(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
5468 new_dstofs, new_srcofs, new_sz,
5474 u->markHostCopyObsolete(true);
5475 #ifdef HAVE_OPENCL_SVM
5476 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5477 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5484 u->markHostCopyObsolete(true);
5486 u->markDeviceCopyObsolete(false);
5489 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
5490 const size_t srcofs[], const size_t srcstep[],
5491 const size_t dstofs[], const size_t dststep[], bool _sync) const CV_OVERRIDE
5496 size_t total = 0, new_sz[] = {0, 0, 0};
5497 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
5498 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
5500 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
5502 srcrawofs, new_srcofs, new_srcstep,
5503 dstrawofs, new_dstofs, new_dststep);
5505 UMatDataAutoLock src_autolock(src, dst);
5507 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
5509 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
5512 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
5514 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
5515 dst->markHostCopyObsolete(false);
5516 #ifdef HAVE_OPENCL_SVM
5517 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5518 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5525 dst->markDeviceCopyObsolete(true);
5530 // there should be no user-visible CPU copies of the UMat which we are going to copy to
5531 CV_Assert(dst->refcount == 0);
5532 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5534 cl_int retval = CL_SUCCESS;
5535 #ifdef HAVE_OPENCL_SVM
5536 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
5537 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5539 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
5540 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5542 Context& ctx = Context::getDefault();
5543 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
5544 CV_DbgAssert(svmFns->isValid());
5548 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
5549 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
5550 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
5551 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
5552 total, 0, NULL, NULL);
5553 CV_OCL_CHECK_RESULT(status, "clEnqueueSVMMemcpy()");
5558 // This code is from MatAllocator::download()/upload()
5559 int isz[CV_MAX_DIM];
5560 uchar* srcptr = (uchar*)src->handle;
5561 for( int i = 0; i < dims; i++ )
5563 CV_Assert( sz[i] <= (size_t)INT_MAX );
5567 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
5568 isz[i] = (int)sz[i];
5570 Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
5572 uchar* dstptr = (uchar*)dst->handle;
5573 for( int i = 0; i < dims; i++ )
5576 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
5578 Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
5580 const Mat* arrays[] = { &m_src, &m_dst };
5582 NAryMatIterator it(arrays, ptrs, 2);
5583 size_t j, planesz = it.size;
5585 for( j = 0; j < it.nplanes; j++, ++it )
5586 memcpy(ptrs[1], ptrs[0], planesz);
5591 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
5593 map(src, ACCESS_READ);
5594 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
5599 map(dst, ACCESS_WRITE);
5600 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
5610 retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
5611 srcrawofs, dstrawofs, total, 0, 0, 0);
5612 CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
5613 (void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
5615 else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
5617 const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5618 size_t new_srcrawofs = srcrawofs & ~(padding-1);
5619 size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
5620 size_t new_dstrawofs = dstrawofs & ~(padding-1);
5621 size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
5623 AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5624 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5625 AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
5626 CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5627 uchar* srcptr = srcBuf.getAlignedPtr();
5628 uchar* dstptr = dstBuf.getAlignedPtr();
5630 CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5632 size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
5633 src_total = std::min(src_total, src->size - new_srcrawofs);
5634 size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
5635 dst_total = std::min(dst_total, dst->size - new_dstrawofs);
5637 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
5638 new_srcrawofs, src_total, srcptr, 0, 0, 0));
5639 CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5640 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5642 for( size_t i = 0; i < new_sz[1]; i++ )
5643 memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
5644 srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
5645 CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5646 new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5650 CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
5651 new_srcofs, new_dstofs, new_sz,
5657 if (retval == CL_SUCCESS)
5659 CV_IMPL_ADD(CV_IMPL_OCL)
5662 #ifdef HAVE_OPENCL_SVM
5663 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
5664 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
5671 dst->markHostCopyObsolete(true);
5673 dst->markDeviceCopyObsolete(false);
5677 CV_OCL_DBG_CHECK(clFinish(q));
5681 BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE {
5682 #ifdef HAVE_OPENCL_SVM
5683 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
5685 return &bufferPoolSVM;
5688 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
5690 return &bufferPoolHostPtr;
5692 if (id != NULL && strcmp(id, "OCL") != 0)
5694 CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
5699 MatAllocator* matStdAllocator;
5701 mutable cv::Mutex cleanupQueueMutex;
5702 mutable std::deque<UMatData*> cleanupQueue;
5704 void flushCleanupQueue() const
5706 if (!cleanupQueue.empty())
5708 std::deque<UMatData*> q;
5710 cv::AutoLock lock(cleanupQueueMutex);
5711 q.swap(cleanupQueue);
5713 for (std::deque<UMatData*>::const_iterator i = q.begin(); i != q.end(); ++i)
5719 void addToCleanupQueue(UMatData* u) const
5721 //TODO: Validation check: CV_Assert(!u->tempUMat());
5723 cv::AutoLock lock(cleanupQueueMutex);
5724 cleanupQueue.push_back(u);
5729 static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee
5731 static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide)
5732 g_isOpenCVActivated = true;
5735 MatAllocator* getOpenCLAllocator()
5737 CV_SINGLETON_LAZY_INIT(MatAllocator, getOpenCLAllocator_())
5740 }} // namespace cv::ocl
5745 // three funcs below are implemented in umatrix.cpp
5746 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
5747 bool autoSteps = false );
5748 void finalizeHdr(UMat& m);
5753 namespace cv { namespace ocl {
5756 // Convert OpenCL buffer memory to UMat
5758 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
5761 int sizes[] = { rows, cols };
5763 CV_Assert(0 <= d && d <= CV_MAX_DIM);
5767 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
5768 dst.usageFlags = USAGE_DEFAULT;
5770 setSize(dst, d, sizes, 0, true);
5773 cl_mem memobj = (cl_mem)cl_mem_buffer;
5774 cl_mem_object_type mem_type = 0;
5776 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
5778 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
5781 CV_OCL_CHECK(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0));
5783 CV_OCL_CHECK(clRetainMemObject(memobj));
5785 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
5786 CV_Assert(total >= rows * step);
5788 // attach clBuffer to UMatData
5789 dst.u = new UMatData(getOpenCLAllocator());
5791 dst.u->allocatorFlags_ = OpenCLAllocator::ALLOCATOR_FLAGS_EXTERNAL_BUFFER; // not allocated from any OpenCV buffer pool
5793 dst.u->handle = cl_mem_buffer;
5794 dst.u->origdata = 0;
5795 dst.u->prevAllocator = 0;
5796 dst.u->size = total;
5802 } // convertFromBuffer()
5806 // Convert OpenCL image2d_t memory to UMat
5808 void convertFromImage(void* cl_mem_image, UMat& dst)
5810 cl_mem clImage = (cl_mem)cl_mem_image;
5811 cl_mem_object_type mem_type = 0;
5813 CV_OCL_CHECK(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0));
5815 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
5817 cl_image_format fmt = { 0, 0 };
5818 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0));
5821 switch (fmt.image_channel_data_type)
5824 case CL_UNSIGNED_INT8:
5829 case CL_SIGNED_INT8:
5833 case CL_UNORM_INT16:
5834 case CL_UNSIGNED_INT16:
5838 case CL_SNORM_INT16:
5839 case CL_SIGNED_INT16:
5843 case CL_SIGNED_INT32:
5852 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
5856 switch (fmt.image_channel_order)
5859 type = CV_MAKE_TYPE(depth, 1);
5865 type = CV_MAKE_TYPE(depth, 4);
5869 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
5874 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0));
5877 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0));
5880 CV_OCL_CHECK(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0));
5882 dst.create((int)h, (int)w, type);
5884 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
5886 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
5889 size_t src_origin[3] = { 0, 0, 0 };
5890 size_t region[3] = { w, h, 1 };
5891 CV_OCL_CHECK(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL));
5893 CV_OCL_CHECK(clFinish(q));
5896 } // convertFromImage()
5899 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
5901 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
5903 cl_uint numDevices = 0;
5904 cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
5905 if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
5907 CV_OCL_DBG_CHECK_RESULT(status,
5908 cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
5911 if (numDevices == 0)
5917 devices.resize((size_t)numDevices);
5918 CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, numDevices, &devices[0], &numDevices));
5921 struct PlatformInfo::Impl
5926 handle = *(cl_platform_id*)id;
5927 getDevices(devices, handle);
5930 String getStrProp(cl_platform_info prop) const
5934 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
5935 sz < sizeof(buf) ? String(buf) : String();
5938 IMPLEMENT_REFCOUNTABLE();
5939 std::vector<cl_device_id> devices;
5940 cl_platform_id handle;
5943 PlatformInfo::PlatformInfo()
5948 PlatformInfo::PlatformInfo(void* platform_id)
5950 p = new Impl(platform_id);
5953 PlatformInfo::~PlatformInfo()
5959 PlatformInfo::PlatformInfo(const PlatformInfo& i)
5966 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
5979 int PlatformInfo::deviceNumber() const
5981 return p ? (int)p->devices.size() : 0;
5984 void PlatformInfo::getDevice(Device& device, int d) const
5986 CV_Assert(p && d < (int)p->devices.size() );
5988 device.set(p->devices[d]);
5991 String PlatformInfo::name() const
5993 return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
5996 String PlatformInfo::vendor() const
5998 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
6001 String PlatformInfo::version() const
6003 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
6006 static void getPlatforms(std::vector<cl_platform_id>& platforms)
6008 cl_uint numPlatforms = 0;
6009 CV_OCL_DBG_CHECK(clGetPlatformIDs(0, NULL, &numPlatforms));
6011 if (numPlatforms == 0)
6017 platforms.resize((size_t)numPlatforms);
6018 CV_OCL_DBG_CHECK(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms));
6021 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
6023 std::vector<cl_platform_id> platforms;
6024 getPlatforms(platforms);
6026 for (size_t i = 0; i < platforms.size(); i++)
6027 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
6030 const char* typeToStr(int type)
6032 static const char* tab[]=
6034 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6035 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6036 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6037 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6038 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6039 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
6040 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
6041 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6043 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6044 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6047 const char* memopTypeToStr(int type)
6049 static const char* tab[] =
6051 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
6052 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
6053 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
6054 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
6055 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6056 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6057 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6058 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6060 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6061 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6064 const char* vecopTypeToStr(int type)
6066 static const char* tab[] =
6068 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6069 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
6070 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6071 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
6072 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6073 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
6074 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
6075 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
6077 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
6078 return cn > 16 ? "?" : tab[depth*16 + cn-1];
6081 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
6083 if( sdepth == ddepth )
6085 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
6086 if( ddepth >= CV_32F ||
6087 (ddepth == CV_32S && sdepth < CV_32S) ||
6088 (ddepth == CV_16S && sdepth <= CV_8S) ||
6089 (ddepth == CV_16U && sdepth == CV_8U))
6091 sprintf(buf, "convert_%s", typestr);
6093 else if( sdepth >= CV_32F )
6094 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
6096 sprintf(buf, "convert_%s_sat", typestr);
6101 const char* getOpenCLErrorString(int errorCode)
6103 #define CV_OCL_CODE(id) case id: return #id
6104 #define CV_OCL_CODE_(id, name) case id: return #name
6107 CV_OCL_CODE(CL_SUCCESS);
6108 CV_OCL_CODE(CL_DEVICE_NOT_FOUND);
6109 CV_OCL_CODE(CL_DEVICE_NOT_AVAILABLE);
6110 CV_OCL_CODE(CL_COMPILER_NOT_AVAILABLE);
6111 CV_OCL_CODE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
6112 CV_OCL_CODE(CL_OUT_OF_RESOURCES);
6113 CV_OCL_CODE(CL_OUT_OF_HOST_MEMORY);
6114 CV_OCL_CODE(CL_PROFILING_INFO_NOT_AVAILABLE);
6115 CV_OCL_CODE(CL_MEM_COPY_OVERLAP);
6116 CV_OCL_CODE(CL_IMAGE_FORMAT_MISMATCH);
6117 CV_OCL_CODE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
6118 CV_OCL_CODE(CL_BUILD_PROGRAM_FAILURE);
6119 CV_OCL_CODE(CL_MAP_FAILURE);
6120 CV_OCL_CODE(CL_MISALIGNED_SUB_BUFFER_OFFSET);
6121 CV_OCL_CODE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
6122 CV_OCL_CODE(CL_COMPILE_PROGRAM_FAILURE);
6123 CV_OCL_CODE(CL_LINKER_NOT_AVAILABLE);
6124 CV_OCL_CODE(CL_LINK_PROGRAM_FAILURE);
6125 CV_OCL_CODE(CL_DEVICE_PARTITION_FAILED);
6126 CV_OCL_CODE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE);
6127 CV_OCL_CODE(CL_INVALID_VALUE);
6128 CV_OCL_CODE(CL_INVALID_DEVICE_TYPE);
6129 CV_OCL_CODE(CL_INVALID_PLATFORM);
6130 CV_OCL_CODE(CL_INVALID_DEVICE);
6131 CV_OCL_CODE(CL_INVALID_CONTEXT);
6132 CV_OCL_CODE(CL_INVALID_QUEUE_PROPERTIES);
6133 CV_OCL_CODE(CL_INVALID_COMMAND_QUEUE);
6134 CV_OCL_CODE(CL_INVALID_HOST_PTR);
6135 CV_OCL_CODE(CL_INVALID_MEM_OBJECT);
6136 CV_OCL_CODE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
6137 CV_OCL_CODE(CL_INVALID_IMAGE_SIZE);
6138 CV_OCL_CODE(CL_INVALID_SAMPLER);
6139 CV_OCL_CODE(CL_INVALID_BINARY);
6140 CV_OCL_CODE(CL_INVALID_BUILD_OPTIONS);
6141 CV_OCL_CODE(CL_INVALID_PROGRAM);
6142 CV_OCL_CODE(CL_INVALID_PROGRAM_EXECUTABLE);
6143 CV_OCL_CODE(CL_INVALID_KERNEL_NAME);
6144 CV_OCL_CODE(CL_INVALID_KERNEL_DEFINITION);
6145 CV_OCL_CODE(CL_INVALID_KERNEL);
6146 CV_OCL_CODE(CL_INVALID_ARG_INDEX);
6147 CV_OCL_CODE(CL_INVALID_ARG_VALUE);
6148 CV_OCL_CODE(CL_INVALID_ARG_SIZE);
6149 CV_OCL_CODE(CL_INVALID_KERNEL_ARGS);
6150 CV_OCL_CODE(CL_INVALID_WORK_DIMENSION);
6151 CV_OCL_CODE(CL_INVALID_WORK_GROUP_SIZE);
6152 CV_OCL_CODE(CL_INVALID_WORK_ITEM_SIZE);
6153 CV_OCL_CODE(CL_INVALID_GLOBAL_OFFSET);
6154 CV_OCL_CODE(CL_INVALID_EVENT_WAIT_LIST);
6155 CV_OCL_CODE(CL_INVALID_EVENT);
6156 CV_OCL_CODE(CL_INVALID_OPERATION);
6157 CV_OCL_CODE(CL_INVALID_GL_OBJECT);
6158 CV_OCL_CODE(CL_INVALID_BUFFER_SIZE);
6159 CV_OCL_CODE(CL_INVALID_MIP_LEVEL);
6160 CV_OCL_CODE(CL_INVALID_GLOBAL_WORK_SIZE);
6162 CV_OCL_CODE(CL_INVALID_PROPERTY);
6164 CV_OCL_CODE(CL_INVALID_IMAGE_DESCRIPTOR);
6165 CV_OCL_CODE(CL_INVALID_COMPILER_OPTIONS);
6166 CV_OCL_CODE(CL_INVALID_LINKER_OPTIONS);
6167 CV_OCL_CODE(CL_INVALID_DEVICE_PARTITION_COUNT);
6169 CV_OCL_CODE_(-69, CL_INVALID_PIPE_SIZE);
6170 CV_OCL_CODE_(-70, CL_INVALID_DEVICE_QUEUE);
6172 CV_OCL_CODE_(-1000, CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR);
6173 CV_OCL_CODE_(-1001, CL_PLATFORM_NOT_FOUND_KHR);
6174 CV_OCL_CODE_(-1002, CL_INVALID_D3D10_DEVICE_KHR);
6175 CV_OCL_CODE_(-1003, CL_INVALID_D3D10_RESOURCE_KHR);
6176 CV_OCL_CODE_(-1004, CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR);
6177 CV_OCL_CODE_(-1005, CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR);
6178 default: return "Unknown OpenCL error";
6184 template <typename T>
6185 static std::string kerToStr(const Mat & k)
6187 int width = k.cols - 1, depth = k.depth();
6188 const T * const data = k.ptr<T>();
6190 std::ostringstream stream;
6191 stream.precision(10);
6195 for (int i = 0; i < width; ++i)
6196 stream << "DIG(" << (int)data[i] << ")";
6197 stream << "DIG(" << (int)data[width] << ")";
6199 else if (depth == CV_32F)
6201 stream.setf(std::ios_base::showpoint);
6202 for (int i = 0; i < width; ++i)
6203 stream << "DIG(" << data[i] << "f)";
6204 stream << "DIG(" << data[width] << "f)";
6208 for (int i = 0; i < width; ++i)
6209 stream << "DIG(" << data[i] << ")";
6210 stream << "DIG(" << data[width] << ")";
6213 return stream.str();
6216 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
6218 Mat kernel = _kernel.getMat().reshape(1, 1);
6220 int depth = kernel.depth();
6224 if (ddepth != depth)
6225 kernel.convertTo(kernel, ddepth);
6227 typedef std::string (* func_t)(const Mat &);
6228 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
6229 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
6230 const func_t func = funcs[ddepth];
6231 CV_Assert(func != 0);
6233 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
6236 #define PROCESS_SRC(src) \
6241 CV_Assert(src.isMat() || src.isUMat()); \
6242 Size csize = src.size(); \
6243 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
6244 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
6245 if (cwidth < ckercn || ckercn <= 0) \
6247 cols.push_back(cwidth); \
6248 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
6250 offsets.push_back(src.offset()); \
6251 steps.push_back(src.step()); \
6252 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
6253 kercns.push_back(ckercn); \
6258 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
6259 InputArray src4, InputArray src5, InputArray src6,
6260 InputArray src7, InputArray src8, InputArray src9,
6261 OclVectorStrategy strat)
6263 const ocl::Device & d = ocl::Device::getDefault();
6265 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
6266 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
6267 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
6268 d.preferredVectorWidthDouble(), -1 };
6270 // if the device says don't use vectors
6271 if (vectorWidths[0] == 1)
6274 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
6275 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
6276 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
6279 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
6282 int checkOptimalVectorWidth(const int *vectorWidths,
6283 InputArray src1, InputArray src2, InputArray src3,
6284 InputArray src4, InputArray src5, InputArray src6,
6285 InputArray src7, InputArray src8, InputArray src9,
6286 OclVectorStrategy strat)
6288 CV_Assert(vectorWidths);
6290 int ref_type = src1.type();
6292 std::vector<size_t> offsets, steps, cols;
6293 std::vector<int> dividers, kercns;
6304 size_t size = offsets.size();
6306 for (size_t i = 0; i < size; ++i)
6307 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
6308 dividers[i] >>= 1, kercns[i] >>= 1;
6311 int kercn = *std::min_element(kercns.begin(), kercns.end());
6316 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
6317 InputArray src4, InputArray src5, InputArray src6,
6318 InputArray src7, InputArray src8, InputArray src9)
6320 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
6326 // TODO Make this as a method of OpenCL "BuildOptions" class
6327 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
6329 if (!buildOptions.empty())
6330 buildOptions += " ";
6331 int type = _m.type(), depth = CV_MAT_DEPTH(type);
6332 buildOptions += format(
6333 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
6334 name.c_str(), ocl::typeToStr(type),
6335 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
6336 name.c_str(), (int)CV_MAT_CN(type),
6337 name.c_str(), (int)CV_ELEM_SIZE(type),
6338 name.c_str(), (int)CV_ELEM_SIZE1(type),
6339 name.c_str(), (int)depth
6344 struct Image2D::Impl
6346 Impl(const UMat &src, bool norm, bool alias)
6350 init(src, norm, alias);
6356 clReleaseMemObject(handle);
6359 static cl_image_format getImageFormat(int depth, int cn, bool norm)
6361 cl_image_format format;
6362 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
6363 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
6364 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
6365 CL_SNORM_INT16, -1, -1, -1, -1 };
6366 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
6368 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
6369 int channelOrder = channelOrders[cn];
6370 format.image_channel_data_type = (cl_channel_type)channelType;
6371 format.image_channel_order = (cl_channel_order)channelOrder;
6375 static bool isFormatSupported(cl_image_format format)
6378 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
6380 cl_context context = (cl_context)Context::getDefault().ptr();
6384 // Figure out how many formats are supported by this context.
6385 cl_uint numFormats = 0;
6386 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6387 CL_MEM_OBJECT_IMAGE2D, numFormats,
6389 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
6392 AutoBuffer<cl_image_format> formats(numFormats);
6393 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
6394 CL_MEM_OBJECT_IMAGE2D, numFormats,
6395 formats.data(), NULL);
6396 CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
6397 for (cl_uint i = 0; i < numFormats; ++i)
6399 if (!memcmp(&formats[i], &format, sizeof(format)))
6408 void init(const UMat &src, bool norm, bool alias)
6411 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
6413 CV_Assert(!src.empty());
6414 CV_Assert(ocl::Device::getDefault().imageSupport());
6416 int err, depth = src.depth(), cn = src.channels();
6418 cl_image_format format = getImageFormat(depth, cn, norm);
6420 if (!isFormatSupported(format))
6421 CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
6423 if (alias && !src.handle(ACCESS_RW))
6424 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
6426 cl_context context = (cl_context)Context::getDefault().ptr();
6427 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
6429 #ifdef CL_VERSION_1_2
6430 // this enables backwards portability to
6431 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
6432 const Device & d = ocl::Device::getDefault();
6433 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
6434 CV_Assert(!alias || canCreateAlias(src));
6435 if (1 < major || (1 == major && 2 <= minor))
6438 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
6439 desc.image_width = src.cols;
6440 desc.image_height = src.rows;
6441 desc.image_depth = 0;
6442 desc.image_array_size = 1;
6443 desc.image_row_pitch = alias ? src.step[0] : 0;
6444 desc.image_slice_pitch = 0;
6445 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
6446 desc.num_mip_levels = 0;
6447 desc.num_samples = 0;
6448 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
6453 CV_SUPPRESS_DEPRECATED_START
6454 CV_Assert(!alias); // This is an OpenCL 1.2 extension
6455 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
6456 CV_SUPPRESS_DEPRECATED_END
6458 CV_OCL_DBG_CHECK_RESULT(err, "clCreateImage()");
6460 size_t origin[] = { 0, 0, 0 };
6461 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
6464 if (!alias && !src.isContinuous())
6466 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
6467 CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
6468 (long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
6471 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
6472 CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
6473 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL));
6474 CV_OCL_DBG_CHECK(clFlush(queue));
6478 devData = (cl_mem)src.handle(ACCESS_READ);
6480 CV_Assert(devData != NULL);
6484 CV_OCL_CHECK(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0));
6485 if (!src.isContinuous())
6487 CV_OCL_DBG_CHECK(clFlush(queue));
6488 CV_OCL_DBG_CHECK(clReleaseMemObject(devData));
6493 IMPLEMENT_REFCOUNTABLE();
6503 Image2D::Image2D(const UMat &src, bool norm, bool alias)
6505 p = new Impl(src, norm, alias);
6508 bool Image2D::canCreateAlias(const UMat &m)
6511 const Device & d = ocl::Device::getDefault();
6512 if (d.imageFromBufferSupport() && !m.empty())
6514 // This is the required pitch alignment in pixels
6515 uint pitchAlign = d.imagePitchAlignment();
6516 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
6518 // We don't currently handle the case where the buffer was created
6519 // with CL_MEM_USE_HOST_PTR
6520 if (!m.u->tempUMat())
6529 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
6531 cl_image_format format = Impl::getImageFormat(depth, cn, norm);
6533 return Impl::isFormatSupported(format);
6536 Image2D::Image2D(const Image2D & i)
6543 Image2D & Image2D::operator = (const Image2D & i)
6562 void* Image2D::ptr() const
6564 return p ? p->handle : 0;
6567 bool internal::isOpenCLForced()
6569 static bool initialized = false;
6570 static bool value = false;
6573 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
6579 bool internal::isPerformanceCheckBypassed()
6581 static bool initialized = false;
6582 static bool value = false;
6585 value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
6591 bool internal::isCLBuffer(UMat& u)
6593 void* h = u.handle(ACCESS_RW);
6596 CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
6598 if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
6601 cl_mem_object_type type = 0;
6602 cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
6603 if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
6613 Impl(const Queue& q)
6622 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
6628 CV_OCL_DBG_CHECK(clFinish((cl_command_queue)queue.ptr()));
6632 uint64 durationNS() const
6634 return (uint64)(timer.getTimeSec() * 1e9);
6640 Timer::Timer(const Queue& q) : p(new Impl(q)) { }
6641 Timer::~Timer() { delete p; }
6655 uint64 Timer::durationNS() const
6658 return p->durationNS();
6663 #endif // HAVE_OPENCL