set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE PATH "Installation Directory")
endif(NOT CMAKE_TOOLCHAIN_FILE)
-# --------------------------------------------------------------
-# Top level OpenCV project
-# --------------------------------------------------------------
-if(CMAKE_GENERATOR MATCHES Xcode AND XCODE_VERSION VERSION_GREATER 4.3)
- cmake_minimum_required(VERSION 2.8.8)
-elseif(IOS)
- cmake_minimum_required(VERSION 2.8.0)
-else()
- cmake_minimum_required(VERSION 2.6.3)
-endif()
+ if(POLICY CMP0017)
+ cmake_policy(SET CMP0017 NEW)
+ endif()
+
if(POLICY CMP0022)
cmake_policy(SET CMP0022 OLD)
endif()
return()
endif()
-find_package(CUDA 4.2 QUIET)
+ set(CMAKE_MODULE_PATH "${OpenCV_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+
+ foreach(var INCLUDE LIBRARY PROGRAM)
+ set(__old_frpm_${var} "${CMAKE_FIND_ROOT_PATH_MODE_${var}}")
+ endforeach()
+
+ set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+ set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
+
+find_package(CUDA "${MIN_VER_CUDA}" QUIET)
+ foreach(var INCLUDE LIBRARY PROGRAM)
+ set(CMAKE_FIND_ROOT_PATH_MODE_${var} "${__old_frpm_${var}}")
+ endforeach()
+
+ list(REMOVE_AT CMAKE_MODULE_PATH 0)
+
if(CUDA_FOUND)
set(HAVE_CUDA 1)
set(HAVE_CUBLAS 1)
endif()
- if(${CUDA_VERSION} VERSION_LESS "5.5")
- find_cuda_helper_libs(npp)
- else()
- # hack for CUDA 5.5
- if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm")
- unset(CUDA_TOOLKIT_INCLUDE CACHE)
- unset(CUDA_CUDART_LIBRARY CACHE)
- unset(CUDA_cublas_LIBRARY CACHE)
- unset(CUDA_cufft_LIBRARY CACHE)
- unset(CUDA_npp_LIBRARY CACHE)
-
- if(SOFTFP)
- set(cuda_arm_path "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabi")
- else()
- set(cuda_arm_path "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabihf")
- endif()
-
- set(CUDA_TOOLKIT_INCLUDE "${cuda_arm_path}/include" CACHE PATH "include path")
- set(CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
-
- set(cuda_arm_library_path "${cuda_arm_path}/lib")
-
- set(CUDA_CUDART_LIBRARY "${cuda_arm_library_path}/libcudart.so" CACHE FILEPATH "cudart library")
- set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
- set(CUDA_cublas_LIBRARY "${cuda_arm_library_path}/libcublas.so" CACHE FILEPATH "cublas library")
- set(CUDA_cufft_LIBRARY "${cuda_arm_library_path}/libcufft.so" CACHE FILEPATH "cufft library")
- set(CUDA_nppc_LIBRARY "${cuda_arm_library_path}/libnppc.so" CACHE FILEPATH "nppc library")
- set(CUDA_nppi_LIBRARY "${cuda_arm_library_path}/libnppi.so" CACHE FILEPATH "nppi library")
- set(CUDA_npps_LIBRARY "${cuda_arm_library_path}/libnpps.so" CACHE FILEPATH "npps library")
- set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}" CACHE STRING "npp library")
- else()
- unset(CUDA_npp_LIBRARY CACHE)
-
- find_cuda_helper_libs(nppc)
- find_cuda_helper_libs(nppi)
- find_cuda_helper_libs(npps)
-
- set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}" CACHE STRING "npp library")
- endif()
- endif()
-
if(WITH_NVCUVID)
find_cuda_helper_libs(nvcuvid)
+ if(WIN32)
+ find_cuda_helper_libs(nvcuvenc)
+ endif()
set(HAVE_NVCUVID 1)
endif()
for (size_t j = 0; j < n; ++j)
{
- int tag = tags[j];
- stringstream s;
- s << tag;
+ int tag = tags[j];
+ stringstream s;
+ s << tag;
- const string filename = "output_"+s.str()+".avi";
+ const string filename = "output_"+s.str()+".avi";
- try
- {
- double fps = fps0;
- Size frame_s = Size(img_c, img_r);
-
- if( tag == VideoWriter::fourcc('H', '2', '6', '1') )
- frame_s = Size(352, 288);
- else if( tag == VideoWriter::fourcc('H', '2', '6', '3') )
- frame_s = Size(704, 576);
- /*else if( tag == CV_FOURCC('M', 'J', 'P', 'G') ||
- tag == CV_FOURCC('j', 'p', 'e', 'g') )
- frame_s = Size(1920, 1080);*/
-
- if( tag == VideoWriter::fourcc('M', 'P', 'E', 'G') )
+ try
{
- frame_s = Size(720, 576);
- fps = 25;
- }
-
- VideoWriter writer(filename, tag, fps, frame_s);
+ double fps = fps0;
+ Size frame_s = Size(img_c, img_r);
+
- if( tag == CV_FOURCC('H', '2', '6', '1') )
++ if( tag == VideoWriter::fourcc('H', '2', '6', '1') )
+ frame_s = Size(352, 288);
- else if( tag == CV_FOURCC('H', '2', '6', '3') )
++ else if( tag == VideoWriter::fourcc('H', '2', '6', '3') )
+ frame_s = Size(704, 576);
+ /*else if( tag == CV_FOURCC('M', 'J', 'P', 'G') ||
+ tag == CV_FOURCC('j', 'p', 'e', 'g') )
+ frame_s = Size(1920, 1080);*/
+
- if( tag == CV_FOURCC('M', 'P', 'E', 'G') )
++ if( tag == VideoWriter::fourcc('M', 'P', 'E', 'G') )
+ {
+ frame_s = Size(720, 576);
+ fps = 25;
+ }
- if (writer.isOpened() == false)
- {
- ts->printf(ts->LOG, "\n\nFile name: %s\n", filename.c_str());
- ts->printf(ts->LOG, "Codec id: %d Codec tag: %c%c%c%c\n", j,
- tag & 255, (tag >> 8) & 255, (tag >> 16) & 255, (tag >> 24) & 255);
- ts->printf(ts->LOG, "Error: cannot create video file.");
- ts->set_failed_test_info(ts->FAIL_INVALID_OUTPUT);
- }
- else
- {
- Mat img(frame_s, CV_8UC3, Scalar::all(0));
- const int coeff = cvRound(min(frame_s.width, frame_s.height)/(fps0 * time_sec));
+ VideoWriter writer(filename, tag, fps, frame_s);
- for (int i = 0 ; i < static_cast<int>(fps * time_sec); i++ )
+ if (writer.isOpened() == false)
{
- //circle(img, Point2i(img_c / 2, img_r / 2), min(img_r, img_c) / 2 * (i + 1), Scalar(255, 0, 0, 0), 2);
- rectangle(img, Point2i(coeff * i, coeff * i), Point2i(coeff * (i + 1), coeff * (i + 1)),
- Scalar::all(255 * (1.0 - static_cast<double>(i) / (fps * time_sec * 2) )), -1);
- writer << img;
+ ts->printf(ts->LOG, "\n\nFile name: %s\n", filename.c_str());
+ ts->printf(ts->LOG, "Codec id: %d Codec tag: %c%c%c%c\n", j,
+ tag & 255, (tag >> 8) & 255, (tag >> 16) & 255, (tag >> 24) & 255);
+ ts->printf(ts->LOG, "Error: cannot create video file.");
+ ts->set_failed_test_info(ts->FAIL_INVALID_OUTPUT);
+ }
+ else
+ {
+ Mat img(frame_s, CV_8UC3, Scalar::all(0));
+ const int coeff = cvRound(min(frame_s.width, frame_s.height)/(fps0 * time_sec));
+
+ for (int i = 0 ; i < static_cast<int>(fps * time_sec); i++ )
+ {
+ //circle(img, Point2i(img_c / 2, img_r / 2), min(img_r, img_c) / 2 * (i + 1), Scalar(255, 0, 0, 0), 2);
+ rectangle(img, Point2i(coeff * i, coeff * i), Point2i(coeff * (i + 1), coeff * (i + 1)),
+ Scalar::all(255 * (1.0 - static_cast<double>(i) / (fps * time_sec * 2) )), -1);
+ writer << img;
+ }
+
+ if (!created) created = true;
+ else remove(filename.c_str());
}
-
- if (!created) created = true;
- else remove(filename.c_str());
}
- }
- catch(...)
- {
- ts->set_failed_test_info(ts->FAIL_INVALID_OUTPUT);
- }
- ts->set_failed_test_info(cvtest::TS::OK);
-
+ catch(...)
+ {
+ ts->set_failed_test_info(ts->FAIL_INVALID_OUTPUT);
+ }
+ ts->set_failed_test_info(cvtest::TS::OK);
}
}
};
:param scale: The optional scale factor for the computed Laplacian values (by default, no scaling is applied
+ :param delta: Optional delta value that is added to the results prior to storing them in ``dst`` . Supported value is 0 only.
+
+ :param bordertype: Pixel extrapolation method.
+
The function calculates the Laplacian of the source image by adding up the second x and y derivatives calculated using the Sobel operator.
+ocl::ConvolveBuf
+----------------
+.. ocv:struct:: ocl::ConvolveBuf
+
+Class providing a memory buffer for :ocv:func:`ocl::convolve` function, plus it allows to adjust some specific parameters. ::
+
+ struct CV_EXPORTS ConvolveBuf
+ {
+ Size result_size;
+ Size block_size;
+ Size user_block_size;
+ Size dft_size;
+ int spect_len;
+
+ oclMat image_spect, templ_spect, result_spect;
+ oclMat image_block, templ_block, result_data;
+
+ void create(Size image_size, Size templ_size);
+ static Size estimateBlockSize(Size result_size, Size templ_size);
+ };
+
+You can use field `user_block_size` to set specific block size for :ocv:func:`ocl::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
+
+ocl::ConvolveBuf::create
+------------------------
+.. ocv:function:: ocl::ConvolveBuf::create(Size image_size, Size templ_size)
+
+Constructs a buffer for :ocv:func:`ocl::convolve` function with respective arguments.
+
ocl::convolve
------------------
Returns void
--- /dev/null
- // and/or other oclMaterials provided with the distribution.
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
- FEATURE_CL_VER_1_2
++// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_OCL_HPP__
+#define __OPENCV_OCL_HPP__
+
+#include <memory>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/objdetect.hpp"
+#include "opencv2/ml.hpp"
+
+namespace cv
+{
+ namespace ocl
+ {
+ enum DeviceType
+ {
+ CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
+ CVCL_DEVICE_TYPE_CPU = (1 << 1),
+ CVCL_DEVICE_TYPE_GPU = (1 << 2),
+ CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
+ //CVCL_DEVICE_TYPE_CUSTOM = (1 << 4)
+ CVCL_DEVICE_TYPE_ALL = 0xFFFFFFFF
+ };
+
+ enum DevMemRW
+ {
+ DEVICE_MEM_R_W = 0,
+ DEVICE_MEM_R_ONLY,
+ DEVICE_MEM_W_ONLY
+ };
+
+ enum DevMemType
+ {
+ DEVICE_MEM_DEFAULT = 0,
+ DEVICE_MEM_AHP, //alloc host pointer
+ DEVICE_MEM_UHP, //use host pointer
+ DEVICE_MEM_CHP, //copy host pointer
+ DEVICE_MEM_PM //persistent memory
+ };
+
+ // these classes contain OpenCL runtime information
+
+ struct PlatformInfo;
+
+ struct DeviceInfo
+ {
+ public:
+ int _id; // reserved, don't use it
+
+ DeviceType deviceType;
+ std::string deviceProfile;
+ std::string deviceVersion;
+ std::string deviceName;
+ std::string deviceVendor;
+ int deviceVendorId;
+ std::string deviceDriverVersion;
+ std::string deviceExtensions;
+
+ size_t maxWorkGroupSize;
+ std::vector<size_t> maxWorkItemSizes;
+ int maxComputeUnits;
+ size_t localMemorySize;
+ size_t maxMemAllocSize;
+
+ int deviceVersionMajor;
+ int deviceVersionMinor;
+
+ bool haveDoubleSupport;
+ bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
++ bool isIntelDevice;
+
+ std::string compilationExtraOptions;
+
+ const PlatformInfo* platform;
+
+ DeviceInfo();
+ };
+
+ struct PlatformInfo
+ {
+ int _id; // reserved, don't use it
+
+ std::string platformProfile;
+ std::string platformVersion;
+ std::string platformName;
+ std::string platformVendor;
+ std::string platformExtensons;
+
+ int platformVersionMajor;
+ int platformVersionMinor;
+
+ std::vector<const DeviceInfo*> devices;
+
+ PlatformInfo();
+ };
+
+ //////////////////////////////// Initialization & Info ////////////////////////
+ typedef std::vector<const PlatformInfo*> PlatformsInfo;
+
+ CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
+
+ typedef std::vector<const DeviceInfo*> DevicesInfo;
+
+ CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
+ const PlatformInfo* platform = NULL);
+
+ // set device you want to use
+ CV_EXPORTS void setDevice(const DeviceInfo* info);
+
+ enum FEATURE_TYPE
+ {
+ FEATURE_CL_DOUBLE = 1,
+ FEATURE_CL_UNIFIED_MEM,
- // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
- CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
++ FEATURE_CL_VER_1_2,
++ FEATURE_CL_INTEL_DEVICE
+ };
+
+ // Represents OpenCL context, interface
+ class CV_EXPORTS Context
+ {
+ protected:
+ Context() { }
+ ~Context() { }
+ public:
+ static Context *getContext();
+
+ bool supportsFeature(FEATURE_TYPE featureType) const;
+ const DeviceInfo& getDeviceInfo() const;
+
+ const void* getOpenCLContextPtr() const;
+ const void* getOpenCLCommandQueuePtr() const;
+ const void* getOpenCLDeviceIDPtr() const;
+ };
+
+ inline const void *getClContextPtr()
+ {
+ return Context::getContext()->getOpenCLContextPtr();
+ }
+
+ inline const void *getClCommandQueuePtr()
+ {
+ return Context::getContext()->getOpenCLCommandQueuePtr();
+ }
+
+ CV_EXPORTS bool supportsFeature(FEATURE_TYPE featureType);
+
+ CV_EXPORTS void finish();
+
+ enum BINARY_CACHE_MODE
+ {
+ CACHE_NONE = 0, // do not cache OpenCL binary
+ CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode
+ CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode
+ CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // cache opencl binary
+ };
+ //! Enable or disable OpenCL program binary caching onto local disk
+ // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
+ // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
+ // binary file, which will be reused when the OpenCV executable is started again.
+ //
+ // This feature is enabled by default.
+ CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
+
+ //! set where binary cache to be saved to
+ CV_EXPORTS void setBinaryPath(const char *path);
+
+ struct ProgramSource
+ {
+ const char* name;
+ const char* programStr;
+ const char* programHash;
+
+ // Cache in memory by name (should be unique). Caching on disk disabled.
+ inline ProgramSource(const char* _name, const char* _programStr)
+ : name(_name), programStr(_programStr), programHash(NULL)
+ {
+ }
+
+ // Cache in memory by name (should be unique). Caching on disk uses programHash mark.
+ inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
+ : name(_name), programStr(_programStr), programHash(_programHash)
+ {
+ }
+ };
+
+ //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ //! Deprecated, will be replaced
+ CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
+ const cv::ocl::ProgramSource& source, String kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ std::vector< std::pair<size_t, const void *> > &args,
+ int channels, int depth, const char *build_options);
+
+ class CV_EXPORTS oclMatExpr;
+ //////////////////////////////// oclMat ////////////////////////////////
+ class CV_EXPORTS oclMat
+ {
+ public:
+ //! default constructor
+ oclMat();
+ //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ oclMat(int rows, int cols, int type);
+ oclMat(Size size, int type);
+ //! constucts oclMatrix and fills it with the specified value _s.
+ oclMat(int rows, int cols, int type, const Scalar &s);
+ oclMat(Size size, int type, const Scalar &s);
+ //! copy constructor
+ oclMat(const oclMat &m);
+
+ //! constructor for oclMatrix headers pointing to user-allocated data
+ oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
+ oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
+
+ //! creates a matrix header for a part of the bigger matrix
+ oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
+ oclMat(const oclMat &m, const Rect &roi);
+
+ //! builds oclMat from Mat. Perfom blocking upload to device.
+ explicit oclMat (const Mat &m);
+
+ //! destructor - calls release()
+ ~oclMat();
+
+ //! assignment operators
+ oclMat &operator = (const oclMat &m);
+ //! assignment operator. Perfom blocking upload to device.
+ oclMat &operator = (const Mat &m);
+ oclMat &operator = (const oclMatExpr& expr);
+
+ //! pefroms blocking upload data to oclMat.
+ void upload(const cv::Mat &m);
+
+
+ //! downloads data from device to host memory. Blocking calls.
+ operator Mat() const;
+ void download(cv::Mat &m) const;
+
+ //! convert to _InputArray
+ operator _InputArray();
+
+ //! convert to _OutputArray
+ operator _OutputArray();
+
+ //! returns a new oclMatrix header for the specified row
+ oclMat row(int y) const;
+ //! returns a new oclMatrix header for the specified column
+ oclMat col(int x) const;
+ //! ... for the specified row span
+ oclMat rowRange(int startrow, int endrow) const;
+ oclMat rowRange(const Range &r) const;
+ //! ... for the specified column span
+ oclMat colRange(int startcol, int endcol) const;
+ oclMat colRange(const Range &r) const;
+
+ //! returns deep copy of the oclMatrix, i.e. the data is copied
+ oclMat clone() const;
+
+ //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
+ // It calls m.create(this->size(), this->type()).
+ // It supports any data type
+ void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
+
+ //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
+ //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
+
+ void assignTo( oclMat &m, int type = -1 ) const;
+
+ //! sets every oclMatrix element to s
+ //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ oclMat& operator = (const Scalar &s);
+ //! sets some of the oclMatrix elements to s, according to the mask
+ //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
+ //! creates alternative oclMatrix header for the same data, with different
+ // number of channels and/or different number of rows. see cvReshape.
+ oclMat reshape(int cn, int rows = 0) const;
+
+ //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
+ // previous data is unreferenced if needed.
+ void create(int rows, int cols, int type);
+ void create(Size size, int type);
+
+ //! allocates new oclMatrix with specified device memory type.
+ void createEx(int rows, int cols, int type,
+ DevMemRW rw_type, DevMemType mem_type);
+ void createEx(Size size, int type, DevMemRW rw_type,
+ DevMemType mem_type);
+
+ //! decreases reference counter;
+ // deallocate the data when reference counter reaches 0.
+ void release();
+
+ //! swaps with other smart pointer
+ void swap(oclMat &mat);
+
+ //! locates oclMatrix header within a parent oclMatrix. See below
+ void locateROI( Size &wholeSize, Point &ofs ) const;
+ //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
+ oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ //! extracts a rectangular sub-oclMatrix
+ // (this is a generalized form of row, rowRange etc.)
+ oclMat operator()( Range rowRange, Range colRange ) const;
+ oclMat operator()( const Rect &roi ) const;
+
+ oclMat& operator+=( const oclMat& m );
+ oclMat& operator-=( const oclMat& m );
+ oclMat& operator*=( const oclMat& m );
+ oclMat& operator/=( const oclMat& m );
+
+ //! returns true if the oclMatrix data is continuous
+ // (i.e. when there are no gaps between successive rows).
+ // similar to CV_IS_oclMat_CONT(cvoclMat->type)
+ bool isContinuous() const;
+ //! returns element size in bytes,
+ // similar to CV_ELEM_SIZE(cvMat->type)
+ size_t elemSize() const;
+ //! returns the size of element channel in bytes.
+ size_t elemSize1() const;
+ //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+ int type() const;
+ //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+ //! 3 channels element actually use 4 channel space
+ int ocltype() const;
+ //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+ int depth() const;
+ //! returns element type, similar to CV_MAT_CN(cvMat->type)
+ int channels() const;
+ //! returns element type, return 4 for 3 channels element,
+ //!becuase 3 channels element actually use 4 channel space
+ int oclchannels() const;
+ //! returns step/elemSize1()
+ size_t step1() const;
+ //! returns oclMatrix size:
+ // width == number of columns, height == number of rows
+ Size size() const;
+ //! returns true if oclMatrix data is NULL
+ bool empty() const;
+
+ //! returns pointer to y-th row
+ uchar* ptr(int y = 0);
+ const uchar *ptr(int y = 0) const;
+
+ //! template version of the above method
+ template<typename _Tp> _Tp *ptr(int y = 0);
+ template<typename _Tp> const _Tp *ptr(int y = 0) const;
+
+ //! matrix transposition
+ oclMat t() const;
+
+ /*! includes several bit-fields:
+ - the magic signature
+ - continuity flag
+ - depth
+ - number of channels
+ */
+ int flags;
+ //! the number of rows and columns
+ int rows, cols;
+ //! a distance between successive rows in bytes; includes the gap if any
+ size_t step;
+ //! pointer to the data(OCL memory object)
+ uchar *data;
+
+ //! pointer to the reference counter;
+ // when oclMatrix points to user-allocated data, the pointer is NULL
+ int *refcount;
+
+ //! helper fields used in locateROI and adjustROI
+ //datastart and dataend are not used in current version
+ uchar *datastart;
+ uchar *dataend;
+
+ //! OpenCL context associated with the oclMat object.
+ Context *clCxt; // TODO clCtx
+ //add offset for handle ROI, calculated in byte
+ int offset;
+ //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
+ int wholerows;
+ int wholecols;
+ };
+
+ // convert InputArray/OutputArray to oclMat references
+ CV_EXPORTS oclMat& getOclMatRef(InputArray src);
+ CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
+
+ ///////////////////// mat split and merge /////////////////////////////////
+ //! Compose a multi-channel array from several single-channel arrays
+ // Support all types
+ CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
+ CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
+
+ //! Divides multi-channel array into several single-channel arrays
+ // Support all types
+ CV_EXPORTS void split(const oclMat &src, oclMat *dst);
+ CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
+
+ ////////////////////////////// Arithmetics ///////////////////////////////////
+
+ //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
+ // supports all data types
+ CV_EXPORTS void addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
+
+ //! adds one matrix to another (dst = src1 + src2)
+ // supports all data types
+ CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ //! adds scalar to a matrix (dst = src1 + s)
+ // supports all data types
+ CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+
+ //! subtracts one matrix from another (dst = src1 - src2)
+ // supports all data types
+ CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ //! subtracts scalar from a matrix (dst = src1 - s)
+ // supports all data types
+ CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+
+ //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
+ // supports all data types
+ CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
+ //! multiplies matrix to a number (dst = scalar * src)
+ // supports all data types
+ CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
+
+ //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
+ // supports all data types
+ CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
+ //! computes element-wise quotient of the two arrays (dst = scale / src)
+ // supports all data types
+ CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
+
+ //! computes element-wise minimum of the two arrays (dst = min(src1, src2))
+ // supports all data types
+ CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst);
+
+ //! computes element-wise maximum of the two arrays (dst = max(src1, src2))
+ // supports all data types
+ CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst);
+
+ //! compares elements of two arrays (dst = src1 <cmpop> src2)
+ // supports all data types
+ CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
+
+ //! transposes the matrix
+ // supports all data types
+ CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
+
+ //! computes element-wise absolute values of an array (dst = abs(src))
+ // supports all data types
+ CV_EXPORTS void abs(const oclMat &src, oclMat &dst);
+
+ //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
+ // supports all data types
+ CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
+ //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
+ // supports all data types
+ CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
+
+ //! computes mean value and standard deviation of all or selected array elements
+ // supports all data types
+ CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
+
+ //! computes norm of array
+ // supports NORM_INF, NORM_L1, NORM_L2
+ // supports all data types
+ CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
+
+ //! computes norm of the difference between two arrays
+ // supports NORM_INF, NORM_L1, NORM_L2
+ // supports all data types
+ CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
+
+ //! reverses the order of the rows, columns or both in a matrix
+ // supports all types
+ CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
+
+ //! computes sum of array elements
+ // support all types
+ CV_EXPORTS Scalar sum(const oclMat &m);
+ CV_EXPORTS Scalar absSum(const oclMat &m);
+ CV_EXPORTS Scalar sqrSum(const oclMat &m);
+
+ //! finds global minimum and maximum array elements and returns their values
+ // support all C1 types
+ CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+
+ //! finds global minimum and maximum array elements and returns their values with locations
+ // support all C1 types
+ CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
+ const oclMat &mask = oclMat());
+
+ //! counts non-zero array elements
+ // support all types
+ CV_EXPORTS int countNonZero(const oclMat &src);
+
+ //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+ // destination array will have the depth type as lut and the same channels number as source
+ //It supports 8UC1 8UC4 only
+ CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
+
+ //! only 8UC1 and 256 bins is supported now
+ CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
+ //! only 8UC1 and 256 bins is supported now
+ CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+
+ //! only 8UC1 is supported now
+ CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+
+ //! bilateralFilter
+ // supports 8UC1 8UC4
+ CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
+
+ //! Applies an adaptive bilateral filter to the input image
+ // This is not truly a bilateral filter. Instead of using user provided fixed parameters,
+ // the function calculates a constant at each window based on local standard deviation,
+ // and use this constant to do filtering.
+ // supports 8UC1, 8UC3
+ CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
+
+ //! computes exponent of each matrix element (dst = e**src)
+ // supports only CV_32FC1, CV_64FC1 type
+ CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
+
+ //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
+ // supports only CV_32FC1, CV_64FC1 type
+ CV_EXPORTS void log(const oclMat &src, oclMat &dst);
+
+ //! computes magnitude of each (x(i), y(i)) vector
+ // supports only CV_32F, CV_64F type
+ CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
+
+ //! computes angle (angle(i)) of each (x(i), y(i)) vector
+ // supports only CV_32F, CV_64F type
+ CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
+
+ //! the function raises every element of tne input array to p
+ // support only CV_32F, CV_64F type
+ CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
+
+ //! converts Cartesian coordinates to polar
+ // supports only CV_32F CV_64F type
+ CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
+
+ //! converts polar coordinates to Cartesian
+ // supports only CV_32F CV_64F type
+ CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
+
+ //! perfroms per-elements bit-wise inversion
+ // supports all types
+ CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
+
+ //! calculates per-element bit-wise disjunction of two arrays
+ // supports all types
+ CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+
+ //! calculates per-element bit-wise conjunction of two arrays
+ // supports all types
+ CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+
+ //! calculates per-element bit-wise "exclusive or" operation
+ // supports all types
+ CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+
+ //! Logical operators
+ CV_EXPORTS oclMat operator ~ (const oclMat &);
+ CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+ CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+ CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+
+
+ //! Mathematics operators
+ CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
+ CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
+ CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
+ CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
+
+ struct CV_EXPORTS ConvolveBuf
+ {
+ Size result_size;
+ Size block_size;
+ Size user_block_size;
+ Size dft_size;
+
+ oclMat image_spect, templ_spect, result_spect;
+ oclMat image_block, templ_block, result_data;
+
+ void create(Size image_size, Size templ_size);
+ static Size estimateBlockSize(Size result_size, Size templ_size);
+ };
+
+ //! computes convolution of two images, may use discrete Fourier transform
+ // support only CV_32FC1 type
+ CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
+ CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
+
+ //! Performs a per-element multiplication of two Fourier spectrums.
+ //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
+ //! support only CV_32FC2 type
+ CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
+
+ CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0);
+
+ //! initializes a scaled identity matrix
+ CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
+
+ //////////////////////////////// Filter Engine ////////////////////////////////
+
+ /*!
+ The Base Class for 1D or Row-wise Filters
+
+ This is the base class for linear or non-linear filters that process 1D data.
+ In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+ */
+ class CV_EXPORTS BaseRowFilter_GPU
+ {
+ public:
+ BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ virtual ~BaseRowFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ int ksize, anchor, bordertype;
+ };
+
+ /*!
+ The Base Class for Column-wise Filters
+
+ This is the base class for linear or non-linear filters that process columns of 2D arrays.
+ Such filters are used for the "vertical" filtering parts in separable filters.
+ */
+ class CV_EXPORTS BaseColumnFilter_GPU
+ {
+ public:
+ BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ virtual ~BaseColumnFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ int ksize, anchor, bordertype;
+ };
+
+ /*!
+ The Base Class for Non-Separable 2D Filters.
+
+ This is the base class for linear or non-linear 2D filters.
+ */
+ class CV_EXPORTS BaseFilter_GPU
+ {
+ public:
+ BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
+ : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
+ virtual ~BaseFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ Size ksize;
+ Point anchor;
+ int borderType;
+ };
+
+ /*!
+ The Base Class for Filter Engine.
+
+ The class can be used to apply an arbitrary filtering operation to an image.
+ It contains all the necessary intermediate buffers.
+ */
+ class CV_EXPORTS FilterEngine_GPU
+ {
+ public:
+ virtual ~FilterEngine_GPU() {}
+
+ virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
+ };
+
+ //! returns the non-separable filter engine with the specified filter
+ CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
+
+ //! returns the primitive row filter with the specified kernel
+ CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
+ int anchor = -1, int bordertype = BORDER_DEFAULT);
+
+ //! returns the primitive column filter with the specified kernel
+ CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
+ int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
+
+ //! returns the separable linear filter engine
+ CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
+ const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! returns the separable filter engine with the specified filters
+ CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ const Ptr<BaseColumnFilter_GPU> &columnFilter);
+
+ //! returns the Gaussian filter engine
+ CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+
+ //! returns filter engine for the generalized Sobel operator
+ CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
+
+ //! applies Laplacian operator to the image
- // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
++ // supports only ksize = 1 and ksize = 3
++ CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1,
++ double delta=0, int borderType=BORDER_DEFAULT);
+
+ //! returns 2D box filter
- // supports CV_8UC1 and CV_8UC4 types
++ // dst type must be the same as source type
+ CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
+ const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns box filter engine
+ CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
+ const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns 2D filter with the specified kernel
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
++ // supports: dst type must be the same as source type
+ CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
+ const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns the non-separable linear filter engine
++ // supports: dst type must be the same as source type
+ CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
+ const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! smooths the image using the normalized box filter
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
+ Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns 2D morphological filter
+ //! only MORPH_ERODE and MORPH_DILATE are supported
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+ CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
+ Point anchor = Point(-1, -1));
+
+ //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+ CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
+ const Point &anchor = Point(-1, -1), int iterations = 1);
+
+ //! a synonym for normalized box filter
- // Note, at the moment this function only works when anchor point is in the kernel center
- // and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
+ static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
+ int borderType = BORDER_CONSTANT)
+ {
+ boxFilter(src, dst, -1, ksize, anchor, borderType);
+ }
+
+ //! applies non-separable 2D linear filter to the image
- Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
- CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers);
++ Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT);
+
+ //! applies separable 2D linear filter to the image
+ CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
+ Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! applies generalized Sobel operator to the image
+ // dst.type must equalize src.type
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! applies the vertical or horizontal Scharr operator to the image
+ // dst.type must equalize src.type
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! smooths the image using Gaussian filter.
+ // dst.type must equalize src.type
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+
+ //! erodes the image (applies the local minimum operator)
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
+
+ //! dilates the image (applies the local maximum operator)
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
+
+ //! applies an advanced morphological operation to the image
+ CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
+
+ ////////////////////////////// Image processing //////////////////////////////
+ //! Does mean shift filtering on GPU.
+ CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+ //! Does mean shift procedure on GPU.
+ CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+ //! Does mean shift segmentation with elimiation of small regions.
+ CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+ //! applies fixed threshold to the image.
+ // supports CV_8UC1 and CV_32FC1 data type
+ // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
+ CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
+
+ //! resizes the image
+ // Supports INTER_NEAREST, INTER_LINEAR
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
+
+ //! Applies a generic geometrical transformation to an image.
+
+ // Supports INTER_NEAREST, INTER_LINEAR.
+ // Map1 supports CV_16SC2, CV_32FC2 types.
+ // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
+ CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
+
+ //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+ // supports CV_8UC1, CV_8UC4, CV_32SC1 types
+ CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
+
+ //! Smoothes image using median filter
+ // The source 1- or 4-channel image. m should be 3 or 5, the image depth should be CV_8U or CV_32F.
+ CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
+
+ //! warps the image using affine transformation
+ // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+
+ //! warps the image using perspective transformation
+ // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+
+ //! computes the integral image and integral for the squared image
+ // sum will have CV_32S type, sqsum - CV32F type
+ // supports only CV_8UC1 source type
+ CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
+ CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
+ CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+
+
+ /////////////////////////////////// ML ///////////////////////////////////////////
+
+ //! Compute closest centers for each lines in source and lable it after center's index
+ // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
- CannyBuf() : counter(NULL) {}
++ // supports NORM_L1 and NORM_L2 distType
++ // if indices is provided, only the indexed rows will be calculated and their results are in the same
++ // order of indices
++ CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers, int distType = NORM_L2SQR, const oclMat &indices = oclMat());
+
+ //!Does k-means procedure on GPU
+ // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
+ CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
+ TermCriteria criteria, int attemps, int flags, oclMat ¢ers);
+
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ class CV_EXPORTS OclCascadeClassifier : public cv::CascadeClassifier
+ {
+ public:
+ void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
+ double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
+ Size minSize = Size(), Size maxSize = Size());
+ };
+
+ /////////////////////////////// Pyramid /////////////////////////////////////
+ CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
+
+ //! upsamples the source image and then smoothes it
+ CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
+
+ //! performs linear blending of two images
+ //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ // supports only CV_8UC1 source type
+ CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
+
+ //! computes vertical sum, supports only CV_32FC1 images
+ CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
+
+ ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+ struct CV_EXPORTS MatchTemplateBuf
+ {
+ Size user_block_size;
+ oclMat imagef, templf;
+ std::vector<oclMat> images;
+ std::vector<oclMat> image_sums;
+ std::vector<oclMat> image_sqsums;
+ };
+
+ //! computes the proximity map for the raster template and the image where the template is searched for
+ // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
+
+ //! computes the proximity map for the raster template and the image where the template is searched for
+ // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
+
+
+
+ ///////////////////////////////////////////// Canny /////////////////////////////////////////////
+ struct CV_EXPORTS CannyBuf;
+
+ //! compute edges of the input image using Canny operator
+ // Support CV_8UC1 only
+ CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+
+ struct CV_EXPORTS CannyBuf
+ {
- explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
++ CannyBuf() : counter(1, 1, CV_32S) { }
+ ~CannyBuf()
+ {
+ release();
+ }
- void *counter;
++ explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(1, 1, CV_32S)
+ {
+ create(image_size, apperture_size);
+ }
+ CannyBuf(const oclMat &dx_, const oclMat &dy_);
+ void create(const Size &image_size, int apperture_size = 3);
+ void release();
+
+ oclMat dx, dy;
+ oclMat dx_buf, dy_buf;
+ oclMat magBuf, mapBuf;
+ oclMat trackBuf1, trackBuf2;
- CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
++ oclMat counter;
+ Ptr<FilterEngine_GPU> filterDX, filterDY;
+ };
+
+ ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
+ //! HoughCircles
+ struct HoughCirclesBuf
+ {
+ oclMat edges;
+ oclMat accum;
+ oclMat srcPoints;
+ oclMat centers;
+ CannyBuf cannyBuf;
+ };
+
+ CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
+
+
+ ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+ //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ //! Param dft_size is the size of DFT transform.
+ //!
+ //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+ // support src type of CV32FC1, CV32FC2
+ // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+ // dft_size is the size of original input, which is used for transformation from complex to real.
+ // dft_size must be powers of 2, 3 and 5
+ // real to complex dft requires at least v1.8 clAmdFft
+ // real to complex dft output is not the same with cpu version
+ // real to complex and complex to real does not support DFT_ROWS
+ CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
+
+ //! implements generalized matrix product algorithm GEMM from BLAS
+ // The functionality requires clAmdBlas library
+ // only support type CV_32FC1
+ // flag GEMM_3_T is not supported
+ CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ const oclMat &src3, double beta, oclMat &dst, int flags = 0);
+
+ //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+
+ struct CV_EXPORTS HOGDescriptor
+
+ {
+
+ enum { DEFAULT_WIN_SIGMA = -1 };
+
+ enum { DEFAULT_NLEVELS = 64 };
+
+ enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+
+
+
+ HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
+
+ Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
+
+ int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
+
+ double threshold_L2hys = 0.2, bool gamma_correction = true,
+
+ int nlevels = DEFAULT_NLEVELS);
+
+
+
+ size_t getDescriptorSize() const;
+
+ size_t getBlockHistogramSize() const;
+
+
+
+ void setSVMDetector(const std::vector<float> &detector);
+
+
+
+ static std::vector<float> getDefaultPeopleDetector();
+
+ static std::vector<float> getPeopleDetector48x96();
+
+ static std::vector<float> getPeopleDetector64x128();
+
+
+
+ void detect(const oclMat &img, std::vector<Point> &found_locations,
+
+ double hit_threshold = 0, Size win_stride = Size(),
+
+ Size padding = Size());
+
+
+
+ void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
+
+ double hit_threshold = 0, Size win_stride = Size(),
+
+ Size padding = Size(), double scale0 = 1.05,
+
+ int group_threshold = 2);
+
+
+
+ void getDescriptors(const oclMat &img, Size win_stride,
+
+ oclMat &descriptors,
+
+ int descr_format = DESCR_FORMAT_COL_BY_COL);
+
+
+
+ Size win_size;
+
+ Size block_size;
+
+ Size block_stride;
+
+ Size cell_size;
+
+ int nbins;
+
+ double win_sigma;
+
+ double threshold_L2hys;
+
+ bool gamma_correction;
+
+ int nlevels;
+
+
+
+ protected:
+
+ // initialize buffers; only need to do once in case of multiscale detection
+
+ void init_buffer(const oclMat &img, Size win_stride);
+
+
+
+ void computeBlockHistograms(const oclMat &img);
+
+ void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
+
+
+
+ double getWinSigma() const;
+
+ bool checkDetectorSize() const;
+
+
+
+ static int numPartsWithin(int size, int part_size, int stride);
+
+ static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+
+
+ // Coefficients of the separating plane
+
+ float free_coef;
+
+ oclMat detector;
+
+
+
+ // Results of the last classification step
+
+ oclMat labels;
+
+ Mat labels_host;
+
+
+
+ // Results of the last histogram evaluation step
+
+ oclMat block_hists;
+
+
+
+ // Gradients conputation results
+
+ oclMat grad, qangle;
+
+
+
+ // scaled image
+
+ oclMat image_scale;
+
+
+
+ // effect size of input image (might be different from original size after scaling)
+
+ Size effect_size;
+
+ };
+
+
+ ////////////////////////feature2d_ocl/////////////////
+ /****************************************************************************************\
+ * Distance *
+ \****************************************************************************************/
+ template<typename T>
+ struct CV_EXPORTS Accumulator
+ {
+ typedef T Type;
+ };
+ template<> struct Accumulator<unsigned char>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<unsigned short>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<char>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<short>
+ {
+ typedef float Type;
+ };
+
+ /*
+ * Manhattan distance (city block distance) functor
+ */
+ template<class T>
+ struct CV_EXPORTS L1
+ {
+ enum { normType = NORM_L1 };
+ typedef T ValueType;
+ typedef typename Accumulator<T>::Type ResultType;
+
+ ResultType operator()( const T *a, const T *b, int size ) const
+ {
+ return normL1<ValueType, ResultType>(a, b, size);
+ }
+ };
+
+ /*
+ * Euclidean distance functor
+ */
+ template<class T>
+ struct CV_EXPORTS L2
+ {
+ enum { normType = NORM_L2 };
+ typedef T ValueType;
+ typedef typename Accumulator<T>::Type ResultType;
+
+ ResultType operator()( const T *a, const T *b, int size ) const
+ {
+ return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+ }
+ };
+
+ /*
+ * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ * bit count of A exclusive XOR'ed with B
+ */
+ struct CV_EXPORTS Hamming
+ {
+ enum { normType = NORM_HAMMING };
+ typedef unsigned char ValueType;
+ typedef int ResultType;
+
+ /** this will count the bits in a ^ b
+ */
+ ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
+ {
+ return normHamming(a, b, size);
+ }
+ };
+
+ ////////////////////////////////// BruteForceMatcher //////////////////////////////////
+
+ class CV_EXPORTS BruteForceMatcher_OCL_base
+ {
+ public:
+ enum DistType {L1Dist = 0, L2Dist, HammingDist};
+ explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
+
+ // Add descriptors to train descriptor collection
+ void add(const std::vector<oclMat> &descCollection);
+
+ // Get train descriptors collection
+ const std::vector<oclMat> &getTrainDescriptors() const;
+
+ // Clear train descriptors collection
+ void clear();
+
+ // Return true if there are not train descriptors in collection
+ bool empty() const;
+
+ // Return true if the matcher supports mask in match methods
+ bool isMaskSupported() const;
+
+ // Find one best match for each query descriptor
+ void matchSingle(const oclMat &query, const oclMat &train,
+ oclMat &trainIdx, oclMat &distance,
+ const oclMat &mask = oclMat());
+
+ // Download trainIdx and distance and convert it to CPU vector with DMatch
+ static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ // Convert trainIdx and distance to vector with DMatch
+ static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
+
+ // Find one best match for each query descriptor
+ void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
+
+ // Make gpu collection of trains and masks in suitable format for matchCollection function
+ void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
+
+ // Find one best match from train collection for each query descriptor
+ void matchCollection(const oclMat &query, const oclMat &trainCollection,
+ oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ const oclMat &masks = oclMat());
+
+ // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+ static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ // Convert trainIdx, imgIdx and distance to vector with DMatch
+ static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
+
+ // Find one best match from train collection for each query descriptor.
+ void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
+
+ // Find k best matches for each query descriptor (in increasing order of distances)
+ void knnMatchSingle(const oclMat &query, const oclMat &train,
+ oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
+ const oclMat &mask = oclMat());
+
+ // Download trainIdx and distance and convert it to vector with DMatch
+ // compactResult is used when mask is not empty. If compactResult is false matches
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+ // matches vector will not contain matches for fully masked out query descriptors.
+ static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ // Convert trainIdx and distance to vector with DMatch
+ static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Find k best matches for each query descriptor (in increasing order of distances).
+ // compactResult is used when mask is not empty. If compactResult is false matches
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+ // matches vector will not contain matches for fully masked out query descriptors.
+ void knnMatch(const oclMat &query, const oclMat &train,
+ std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
+ bool compactResult = false);
+
+ // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+ void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
+ oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ const oclMat &maskCollection = oclMat());
+
+ // Download trainIdx and distance and convert it to vector with DMatch
+ // compactResult is used when mask is not empty. If compactResult is false matches
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+ // matches vector will not contain matches for fully masked out query descriptors.
+ static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ // Convert trainIdx and distance to vector with DMatch
+ static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Find k best matches for each query descriptor (in increasing order of distances).
+ // compactResult is used when mask is not empty. If compactResult is false matches
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+ // matches vector will not contain matches for fully masked out query descriptors.
+ void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
+ const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+
+ // Find best matches for each query descriptor which have distance less than maxDistance.
+ // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+ // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+ // because it didn't have enough memory.
+ // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+ // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ // Matches doesn't sorted.
+ void radiusMatchSingle(const oclMat &query, const oclMat &train,
+ oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ const oclMat &mask = oclMat());
+
+ // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+ // matches will be sorted in increasing order of distances.
+ // compactResult is used when mask is not empty. If compactResult is false matches
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+ // matches vector will not contain matches for fully masked out query descriptors.
+ static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ // Convert trainIdx, nMatches and distance to vector with DMatch.
+ static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Find best matches for each query descriptor which have distance less than maxDistance
+ // in increasing order of distances).
+ void radiusMatch(const oclMat &query, const oclMat &train,
+ std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ const oclMat &mask = oclMat(), bool compactResult = false);
+
+ // Find best matches for each query descriptor which have distance less than maxDistance.
+ // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+ // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ // Matches doesn't sorted.
+ void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ const std::vector<oclMat> &masks = std::vector<oclMat>());
+
+ // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+ // matches will be sorted in increasing order of distances.
+ // compactResult is used when mask is not empty. If compactResult is false matches
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+ // matches vector will not contain matches for fully masked out query descriptors.
+ static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ // Convert trainIdx, nMatches and distance to vector with DMatch.
+ static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Find best matches from train collection for each query descriptor which have distance less than
+ // maxDistance (in increasing order of distances).
+ void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+
+ DistType distType;
+
+ private:
+ std::vector<oclMat> trainDescCollection;
+ };
+
+ template <class Distance>
+ class CV_EXPORTS BruteForceMatcher_OCL;
+
+ template <typename T>
+ class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
+ {
+ public:
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
+ explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
+ };
+ template <typename T>
+ class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
+ {
+ public:
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
+ explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
+ };
+ template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
+ {
+ public:
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
+ explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
+ };
+
+ class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
+ {
+ public:
+ explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
+ };
+
+ class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
+ {
+ public:
+ explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+ int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+
+ //! return 1 rows matrix with CV_32FC2 type
+ void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
+ //! download points of type Point2f to a vector. the vector's content will be erased
+ void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
+
+ int maxCorners;
+ double qualityLevel;
+ double minDistance;
+
+ int blockSize;
+ bool useHarrisDetector;
+ double harrisK;
+ void releaseMemory()
+ {
+ Dx_.release();
+ Dy_.release();
+ eig_.release();
+ minMaxbuf_.release();
+ tmpCorners_.release();
+ }
+ private:
+ oclMat Dx_;
+ oclMat Dy_;
+ oclMat eig_;
+ oclMat minMaxbuf_;
+ oclMat tmpCorners_;
+ };
+
+ inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
+ int blockSize_, bool useHarrisDetector_, double harrisK_)
+ {
+ maxCorners = maxCorners_;
+ qualityLevel = qualityLevel_;
+ minDistance = minDistance_;
+ blockSize = blockSize_;
+ useHarrisDetector = useHarrisDetector_;
+ harrisK = harrisK_;
+ }
+
+ /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+
+ class CV_EXPORTS PyrLKOpticalFlow
+ {
+ public:
+ PyrLKOpticalFlow()
+ {
+ winSize = Size(21, 21);
+ maxLevel = 3;
+ iters = 30;
+ derivLambda = 0.5;
+ useInitialFlow = false;
+ minEigThreshold = 1e-4f;
+ getMinEigenVals = false;
+ isDeviceArch11_ = false;
+ }
+
+ void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
+ oclMat &status, oclMat *err = 0);
+
+ void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
+
+ Size winSize;
+ int maxLevel;
+ int iters;
+ double derivLambda;
+ bool useInitialFlow;
+ float minEigThreshold;
+ bool getMinEigenVals;
+
+ void releaseMemory()
+ {
+ dx_calcBuf_.release();
+ dy_calcBuf_.release();
+
+ prevPyr_.clear();
+ nextPyr_.clear();
+
+ dx_buf_.release();
+ dy_buf_.release();
+ }
+
+ private:
+ void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
+
+ void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
+
+ oclMat dx_calcBuf_;
+ oclMat dy_calcBuf_;
+
+ std::vector<oclMat> prevPyr_;
+ std::vector<oclMat> nextPyr_;
+
+ oclMat dx_buf_;
+ oclMat dy_buf_;
+
+ oclMat uPyr_[2];
+ oclMat vPyr_[2];
+
+ bool isDeviceArch11_;
+ };
+
+ class CV_EXPORTS FarnebackOpticalFlow
+ {
+ public:
+ FarnebackOpticalFlow();
+
+ int numLevels;
+ double pyrScale;
+ bool fastPyramids;
+ int winSize;
+ int numIters;
+ int polyN;
+ double polySigma;
+ int flags;
+
+ void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
+
+ void releaseMemory();
+
+ private:
+ void prepareGaussian(
+ int n, double sigma, float *g, float *xg, float *xxg,
+ double &ig11, double &ig03, double &ig33, double &ig55);
+
+ void setPolynomialExpansionConsts(int n, double sigma);
+
+ void updateFlow_boxFilter(
+ const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
+ oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+
+ void updateFlow_gaussianBlur(
+ const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
+ oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+
+ oclMat frames_[2];
+ oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
+ std::vector<oclMat> pyramid0_, pyramid1_;
+ };
+
+ //////////////// build warping maps ////////////////////
+ //! builds plane warping maps
+ CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
+ //! builds cylindrical warping maps
+ CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ //! builds spherical warping maps
+ CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ //! builds Affine warping maps
+ CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+
+ //! builds Perspective warping maps
+ CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+
+ ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
+ //! Interpolate frames (images) using provided optical flow (displacement field).
+ //! frame0 - frame 0 (32-bit floating point images, single channel)
+ //! frame1 - frame 1 (the same type and size)
+ //! fu - forward horizontal displacement
+ //! fv - forward vertical displacement
+ //! bu - backward horizontal displacement
+ //! bv - backward vertical displacement
+ //! pos - new frame position
+ //! newFrame - new frame
+ //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
+ //! occlusion masks 0, occlusion masks 1,
+ //! interpolated forward flow 0, interpolated forward flow 1,
+ //! interpolated backward flow 0, interpolated backward flow 1
+ //!
+ CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
+ const oclMat &fu, const oclMat &fv,
+ const oclMat &bu, const oclMat &bv,
+ float pos, oclMat &newFrame, oclMat &buf);
+
+ //! computes moments of the rasterized shape or a vector of points
++ //! _array should be a vector a points standing for the contour
++ CV_EXPORTS Moments ocl_moments(InputArray contour);
++ //! src should be a general image uploaded to the GPU.
++ //! the supported oclMat type are CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 and CV_64FC1
++ //! to use type of CV_64FC1, the GPU should support CV_64FC1
++ CV_EXPORTS Moments ocl_moments(oclMat& src, bool binary);
+
+ class CV_EXPORTS StereoBM_OCL
+ {
+ public:
+ enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+
+ enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+
+ //! the default constructor
+ StereoBM_OCL();
+ //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+ StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+
+ //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+ //! Output disparity has CV_8U type.
+ void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
+
+ //! Some heuristics that tries to estmate
+ // if current GPU will be faster then CPU in this algorithm.
+ // It queries current active device.
+ static bool checkIfGpuCallReasonable();
+
+ int preset;
+ int ndisp;
+ int winSize;
+
+ // If avergeTexThreshold == 0 => post procesing is disabled
+ // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+ // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+ // i.e. input left image is low textured.
+ float avergeTexThreshold;
+ private:
+ oclMat minSSD, leBuf, riBuf;
+ };
+
+ class CV_EXPORTS StereoBeliefPropagation
+ {
+ public:
+ enum { DEFAULT_NDISP = 64 };
+ enum { DEFAULT_ITERS = 5 };
+ enum { DEFAULT_LEVELS = 5 };
+ static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
+ explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
+ int iters = DEFAULT_ITERS,
+ int levels = DEFAULT_LEVELS,
+ int msg_type = CV_16S);
+ StereoBeliefPropagation(int ndisp, int iters, int levels,
+ float max_data_term, float data_weight,
+ float max_disc_term, float disc_single_jump,
+ int msg_type = CV_32F);
+ void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ void operator()(const oclMat &data, oclMat &disparity);
+ int ndisp;
+ int iters;
+ int levels;
+ float max_data_term;
+ float data_weight;
+ float max_disc_term;
+ float disc_single_jump;
+ int msg_type;
+ private:
+ oclMat u, d, l, r, u2, d2, l2, r2;
+ std::vector<oclMat> datas;
+ oclMat out;
+ };
+
+ class CV_EXPORTS StereoConstantSpaceBP
+ {
+ public:
+ enum { DEFAULT_NDISP = 128 };
+ enum { DEFAULT_ITERS = 8 };
+ enum { DEFAULT_LEVELS = 4 };
+ enum { DEFAULT_NR_PLANE = 4 };
+ static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
+ explicit StereoConstantSpaceBP(
+ int ndisp = DEFAULT_NDISP,
+ int iters = DEFAULT_ITERS,
+ int levels = DEFAULT_LEVELS,
+ int nr_plane = DEFAULT_NR_PLANE,
+ int msg_type = CV_32F);
+ StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
+ float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
+ int min_disp_th = 0,
+ int msg_type = CV_32F);
+ void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ int ndisp;
+ int iters;
+ int levels;
+ int nr_plane;
+ float max_data_term;
+ float data_weight;
+ float max_disc_term;
+ float disc_single_jump;
+ int min_disp_th;
+ int msg_type;
+ bool use_local_init_data_cost;
+ private:
+ oclMat u[2], d[2], l[2], r[2];
+ oclMat disp_selected_pyr[2];
+ oclMat data_cost;
+ oclMat data_cost_selected;
+ oclMat temp;
+ oclMat out;
+ };
+
+ // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+ //
+ // see reference:
+ // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+ // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+ class CV_EXPORTS OpticalFlowDual_TVL1_OCL
+ {
+ public:
+ OpticalFlowDual_TVL1_OCL();
+
+ void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
+
+ void collectGarbage();
+
+ /**
+ * Time step of the numerical scheme.
+ */
+ double tau;
+
+ /**
+ * Weight parameter for the data term, attachment parameter.
+ * This is the most relevant parameter, which determines the smoothness of the output.
+ * The smaller this parameter is, the smoother the solutions we obtain.
+ * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+ */
+ double lambda;
+
+ /**
+ * Weight parameter for (u - v)^2, tightness parameter.
+ * It serves as a link between the attachment and the regularization terms.
+ * In theory, it should have a small value in order to maintain both parts in correspondence.
+ * The method is stable for a large range of values of this parameter.
+ */
+ double theta;
+
+ /**
+ * Number of scales used to create the pyramid of images.
+ */
+ int nscales;
+
+ /**
+ * Number of warpings per scale.
+ * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+ * This is a parameter that assures the stability of the method.
+ * It also affects the running time, so it is a compromise between speed and accuracy.
+ */
+ int warps;
+
+ /**
+ * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+ * A small value will yield more accurate solutions at the expense of a slower convergence.
+ */
+ double epsilon;
+
+ /**
+ * Stopping criterion iterations number used in the numerical scheme.
+ */
+ int iterations;
+
+ bool useInitialFlow;
+
+ private:
+ void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
+
+ std::vector<oclMat> I0s;
+ std::vector<oclMat> I1s;
+ std::vector<oclMat> u1s;
+ std::vector<oclMat> u2s;
+
+ oclMat I1x_buf;
+ oclMat I1y_buf;
+
+ oclMat I1w_buf;
+ oclMat I1wx_buf;
+ oclMat I1wy_buf;
+
+ oclMat grad_buf;
+ oclMat rho_c_buf;
+
+ oclMat p11_buf;
+ oclMat p12_buf;
+ oclMat p21_buf;
+ oclMat p22_buf;
+
+ oclMat diff_buf;
+ oclMat norm_buf;
+ };
+ // current supported sorting methods
+ enum
+ {
+ SORT_BITONIC, // only support power-of-2 buffer size
+ SORT_SELECTION, // cannot sort duplicate keys
+ SORT_MERGE,
+ SORT_RADIX // only support signed int/float keys(CV_32S/CV_32F)
+ };
+ //! Returns the sorted result of all the elements in input based on equivalent keys.
+ //
+ // The element unit in the values to be sorted is determined from the data type,
+ // i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
+ // matrix dimension.
+ // both keys and values will be sorted inplace
+ // Key needs to be single channel oclMat.
+ //
+ // Example:
+ // input -
+ // keys = {2, 3, 1} (CV_8UC1)
+ // values = {10,5, 4,3, 6,2} (CV_8UC2)
+ // sortByKey(keys, values, SORT_SELECTION, false);
+ // output -
+ // keys = {1, 2, 3} (CV_8UC1)
+ // values = {6,2, 10,5, 4,3} (CV_8UC2)
+ CV_EXPORTS void sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
+ /*!Base class for MOG and MOG2!*/
+ class CV_EXPORTS BackgroundSubtractor
+ {
+ public:
+ //! the virtual destructor
+ virtual ~BackgroundSubtractor();
+ //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
+ virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
+
+ //! computes a background image
+ virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
+ };
+ /*!
+ Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+
+ The class implements the following algorithm:
+ "An improved adaptive background mixture model for real-time tracking with shadow detection"
+ P. KadewTraKuPong and R. Bowden,
+ Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+ http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+ */
+ class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
+ {
+ public:
+ //! the default constructor
+ MOG(int nmixtures = -1);
+
+ //! re-initiaization method
+ void initialize(Size frameSize, int frameType);
+
+ //! the update operator
+ void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
+
+ //! computes a background image which are the mean of all background gaussians
+ void getBackgroundImage(oclMat& backgroundImage) const;
+
+ //! releases all inner buffers
+ void release();
+
+ int history;
+ float varThreshold;
+ float backgroundRatio;
+ float noiseSigma;
+
+ private:
+ int nmixtures_;
+
+ Size frameSize_;
+ int frameType_;
+ int nframes_;
+
+ oclMat weight_;
+ oclMat sortKey_;
+ oclMat mean_;
+ oclMat var_;
+ };
+
+ /*!
+ The class implements the following algorithm:
+ "Improved adaptive Gausian mixture model for background subtraction"
+ Z.Zivkovic
+ International Conference Pattern Recognition, UK, August, 2004.
+ http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+ */
+ class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
+ {
+ public:
+ //! the default constructor
+ MOG2(int nmixtures = -1);
+
+ //! re-initiaization method
+ void initialize(Size frameSize, int frameType);
+
+ //! the update operator
+ void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
+
+ //! computes a background image which are the mean of all background gaussians
+ void getBackgroundImage(oclMat& backgroundImage) const;
+
+ //! releases all inner buffers
+ void release();
+
+ // parameters
+ // you should call initialize after parameters changes
+
+ int history;
+
+ //! here it is the maximum allowed number of mixture components.
+ //! Actual number is determined dynamically per pixel
+ float varThreshold;
+ // threshold on the squared Mahalanobis distance to decide if it is well described
+ // by the background model or not. Related to Cthr from the paper.
+ // This does not influence the update of the background. A typical value could be 4 sigma
+ // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+ /////////////////////////
+ // less important parameters - things you might change but be carefull
+ ////////////////////////
+
+ float backgroundRatio;
+ // corresponds to fTB=1-cf from the paper
+ // TB - threshold when the component becomes significant enough to be included into
+ // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+ // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+ // it is considered foreground
+ // float noiseSigma;
+ float varThresholdGen;
+
+ //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+ //when a sample is close to the existing components. If it is not close
+ //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+ //Smaller Tg leads to more generated components and higher Tg might make
+ //lead to small number of components but they can grow too large
+ float fVarInit;
+ float fVarMin;
+ float fVarMax;
+
+ //initial variance for the newly generated components.
+ //It will will influence the speed of adaptation. A good guess should be made.
+ //A simple way is to estimate the typical standard deviation from the images.
+ //I used here 10 as a reasonable value
+ // min and max can be used to further control the variance
+ float fCT; //CT - complexity reduction prior
+ //this is related to the number of samples needed to accept that a component
+ //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+ //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+ //shadow detection parameters
+ bool bShadowDetection; //default 1 - do shadow detection
+ unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+ float fTau;
+ // Tau - shadow threshold. The shadow is detected if the pixel is darker
+ //version of the background. Tau is a threshold on how much darker the shadow can be.
+ //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+ //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+ private:
+ int nmixtures_;
+
+ Size frameSize_;
+ int frameType_;
+ int nframes_;
+
+ oclMat weight_;
+ oclMat variance_;
+ oclMat mean_;
+
+ oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
+ };
+
+ /*!***************Kalman Filter*************!*/
+ class CV_EXPORTS KalmanFilter
+ {
+ public:
+ KalmanFilter();
+ //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
+ KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
+ //! re-initializes Kalman filter. The previous content is destroyed.
+ void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
+
+ const oclMat& predict(const oclMat& control=oclMat());
+ const oclMat& correct(const oclMat& measurement);
+
+ oclMat statePre; //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
+ oclMat statePost; //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
+ oclMat transitionMatrix; //!< state transition matrix (A)
+ oclMat controlMatrix; //!< control matrix (B) (not used if there is no control)
+ oclMat measurementMatrix; //!< measurement matrix (H)
+ oclMat processNoiseCov; //!< process noise covariance matrix (Q)
+ oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
+ oclMat errorCovPre; //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
+ oclMat gain; //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
+ oclMat errorCovPost; //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
+ private:
+ oclMat temp1;
+ oclMat temp2;
+ oclMat temp3;
+ oclMat temp4;
+ oclMat temp5;
+ };
+
+ /*!***************K Nearest Neighbour*************!*/
+ class CV_EXPORTS KNearestNeighbour: public CvKNearest
+ {
+ public:
+ KNearestNeighbour();
+ ~KNearestNeighbour();
+
+ bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
+ bool isRegression = false, int max_k = 32, bool updateBase = false);
+
+ void clear();
+
+ void find_nearest(const oclMat& samples, int k, oclMat& lables);
+
+ private:
+ oclMat samples_ocl;
+ };
+
+ /*!*************** SVM *************!*/
+ class CV_EXPORTS CvSVM_OCL : public CvSVM
+ {
+ public:
+ CvSVM_OCL();
+
+ CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
+ const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
+ CvSVMParams params=CvSVMParams());
+ CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
+ CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
+ CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
+ float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
+
+ protected:
+ float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
+ void create_kernel();
+ void create_solver();
+ };
+
+ /*!*************** END *************!*/
+ }
+}
+#if defined _MSC_VER && _MSC_VER >= 1200
+# pragma warning( push)
+# pragma warning( disable: 4267)
+#endif
+#include "opencv2/ocl/matrix_operations.hpp"
+#if defined _MSC_VER && _MSC_VER >= 1200
+# pragma warning( pop)
+#endif
+
+#endif /* __OPENCV_OCL_HPP__ */
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
--// and/or other oclMaterials provided with the distribution.
++// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
--// and/or other oclMaterials provided with the distribution.
++// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
--// and/or other oclMaterials provided with the distribution.
++// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
CV_EXPORTS cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
CV_EXPORTS void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt,
- const cv::ocl::ProgramEntry* source, std::string kernelName);
+ const cv::ocl::ProgramEntry* source, String kernelName);
CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt,
- const cv::ocl::ProgramEntry* source, std::string kernelName, const char *build_options);
+ const cv::ocl::ProgramEntry* source, String kernelName, const char *build_options);
+ CV_EXPORTS cl_kernel openCLGetKernelFromSource(Context *ctx, const cv::ocl::ProgramEntry* source,
- string kernelName, int channels, int depth, const char *build_options);
++ String kernelName, int channels, int depth, const char *build_options);
CV_EXPORTS void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
-CV_EXPORTS void openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
+ CV_EXPORTS void openCLExecuteKernel(Context *ctx, cl_kernel kernel, size_t globalThreads[3],
+ size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args);
+CV_EXPORTS void openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, String kernelName, std::vector< std::pair<size_t, const void *> > &args,
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
-CV_EXPORTS void openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName,
+CV_EXPORTS void openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
-CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
+CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
-CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
+CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options);
--- /dev/null
- // and/or other oclMaterials provided with the distribution.
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
++// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cv;
+using namespace perf;
+
+//////////////////////////////////////////////////////////////////////
+// HoughCircles
+
+typedef std::tr1::tuple<cv::Size, float, float> Size_Dp_MinDist_t;
+typedef perf::TestBaseWithParam<Size_Dp_MinDist_t> Size_Dp_MinDist;
+
+PERF_TEST_P(Size_Dp_MinDist, OCL_HoughCircles,
+ testing::Combine(
+ testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p),
+ testing::Values(1.0f, 2.0f, 4.0f),
+ testing::Values(1.0f, 10.0f)))
+{
+ const Size_Dp_MinDist_t params = GetParam();
+ const cv::Size size = std::tr1::get<0>(params);
+ const float dp = std::tr1::get<1>(params);
+ const float minDist = std::tr1::get<2>(params);
+
+ const int minRadius = 10;
+ const int maxRadius = 30;
+ const int cannyThreshold = 100;
+ const int votesThreshold = 15;
+
+ cv::RNG rng(123456789);
+
+ cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)), circles;
+
+ const int numCircles = rng.uniform(50, 100);
+ for (int i = 0; i < numCircles; ++i)
+ {
+ cv::Point center(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
+ const int radius = rng.uniform(minRadius, maxRadius + 1);
+
+ cv::circle(src, center, radius, cv::Scalar::all(255), -1);
+ }
+
+ declare.time(10.0).iterations(25);
+
+ if (RUN_OCL_IMPL)
+ {
+ cv::ocl::oclMat ocl_src(src), ocl_circles;
+
+ OCL_TEST_CYCLE() cv::ocl::HoughCircles(ocl_src, ocl_circles, HOUGH_GRADIENT, dp, minDist,
+ cannyThreshold, votesThreshold, minRadius, maxRadius);
+ }
+ else if (RUN_PLAIN_IMPL)
+ {
+ TEST_CYCLE() cv::HoughCircles(src, circles, HOUGH_GRADIENT, dp, minDist, cannyThreshold,
+ votesThreshold, minRadius, maxRadius);
+ }
+ else
+ OCL_PERF_ELSE
+
+ int value = 0;
+ SANITY_CHECK(value);
+}
+
+#endif // HAVE_OPENCL
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { " ", " ", "2", "4", "4" };
- ostringstream stream;
+ std::ostringstream stream;
stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()];
- stream << " -D MAX_VAL=" << (WT)std::numeric_limits<T>::max();
- stream << " -D MIN_VAL=" << (std::numeric_limits<T>::is_integer ?
- (WT)std::numeric_limits<T>::min() : -(WT)(std::numeric_limits<T>::max()));
- if (numeric_limits<T>::is_integer)
++ if (std::numeric_limits<T>::is_integer)
+ {
- stream << " -D MAX_VAL=" << (WT)numeric_limits<T>::max();
- stream << " -D MIN_VAL=" << (WT)numeric_limits<T>::min();
++ stream << " -D MAX_VAL=" << (WT)std::numeric_limits<T>::max();
++ stream << " -D MIN_VAL=" << (WT)std::numeric_limits<T>::min();
+ }
+ else
+ stream << " -D DEPTH_" << src.depth();
std::string buildOptions = stream.str();
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
int minvalid_cols = 0, moffset = 0;
if (!mask.empty())
////////////////////////////////// flip //////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
- static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, String kernelName)
- {
- int channels = dst.oclchannels();
- int depth = dst.depth();
+ enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS };
- int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
- {4, 4, 4, 4, 1, 1, 1},
- {4, 4, 4, 4, 1, 1, 1},
- {4, 4, 4, 4, 1, 1, 1}
- };
-
- size_t vector_length = vector_lengths[channels - 1][depth];
- int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
-
- int cols = divUp(dst.cols * channels + offset_cols, vector_length);
- int rows = divUp(dst.rows, 2);
-
- size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { cols, rows, 1 };
-
- int dst_step1 = dst.cols * dst.elemSize();
- std::vector<std::pair<size_t , const void *> > args;
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
-
- openCLExecuteKernel(src.clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args, -1, depth);
- }
-
- static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, String kernelName, bool isVertical)
-static void arithmetic_flip_run(const oclMat &src, oclMat &dst, string kernelName, int flipType)
++static void arithmetic_flip_run(const oclMat &src, oclMat &dst, String kernelName, int flipType)
{
- int channels = dst.oclchannels();
- int depth = dst.depth();
+ int cols = dst.cols, rows = dst.rows;
+ if ((cols == 1 && flipType == FLIP_COLS) ||
+ (rows == 1 && flipType == FLIP_ROWS) ||
+ (rows == 1 && cols == 1 && flipType == FLIP_BOTH))
+ {
+ src.copyTo(dst);
+ return;
+ }
- int vector_lengths[4][7] = {{1, 1, 1, 1, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1}
- };
+ cols = flipType == FLIP_COLS ? divUp(cols, 2) : cols;
+ rows = flipType & FLIP_ROWS ? divUp(rows, 2) : rows;
- size_t vector_length = vector_lengths[channels - 1][depth];
- int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
- int cols = divUp(dst.cols + offset_cols, vector_length);
- cols = isVertical ? cols : divUp(cols, 2);
- int rows = isVertical ? divUp(dst.rows, 2) : dst.rows;
+ const char * const channelMap[] = { "", "", "2", "4", "4" };
+ const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
+ std::string buildOptions = format("-D T=%s%s", typeMap[dst.depth()], channelMap[dst.oclchannels()]);
size_t localThreads[3] = { 64, 4, 1 };
size_t globalThreads[3] = { cols, rows, 1 };
- int dst_step1 = dst.cols * dst.elemSize();
+ int elemSize = src.elemSize();
+ int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
+ int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
+
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset ));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
- if (isVertical)
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows ));
- else
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
-
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
-
- const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip;
-
- openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
+ openCLExecuteKernel(src.clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args,
+ -1, -1, buildOptions.c_str());
}
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
using namespace cv;
using namespace cv::ocl;
- void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
- oclMat &result)
+ void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2,
+ oclMat &dst)
{
- cv::ocl::Context *ctx = img1.clCxt;
- CV_Assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
- int channels = img1.oclchannels();
- int depth = img1.depth();
- int rows = img1.rows;
- int cols = img1.cols;
- int istep = img1.step1();
- int wstep = weights1.step1();
- size_t globalSize[] = {cols * channels / 4, rows, 1};
- size_t localSize[] = {256, 1, 1};
+ CV_Assert(src1.depth() <= CV_32F);
+ CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
+ CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() &&
+ weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);
+
+ dst.create(src1.size(), src1.type());
+
+ size_t globalSize[] = { dst.cols, dst.rows, 1};
+ size_t localSize[] = { 16, 16, 1 };
+
+ int depth = dst.depth(), ocn = dst.oclchannels();
+ int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
+ int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
+ int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize();
+ int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize();
+ int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
+
+ const char * const channelMap[] = { "", "", "2", "4", "4" };
+ const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
+ std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s",
+ typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn],
+ depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]);
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
+ std::vector< std::pair<size_t, const void *> > args;
- result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels()));
- if(globalSize[0] != 0)
- {
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data ));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img1.data ));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img2.data ));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&weights1.data ));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&weights2.data ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&istep ));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&wstep ));
- String kernelName = "BlendLinear";
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&weights1.data ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight1_offset ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight1_step ));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&weights2.data ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight2_offset ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight2_step ));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
- openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
- }
+ openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args,
+ -1, -1, buildOptions.c_str());
}
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
}
- ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
+ ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, magBuf);
+ ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, mapBuf);
- ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
- ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
-
- int counter_i [1] = { 0 };
- int err = 0;
- if(counter)
- {
- openCLFree(counter);
- }
- counter = clCreateBuffer( *((cl_context*)getClContextPtr()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
- openCLSafeCall(err);
+ ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf1);
+ ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf2);
}
void cv::ocl::CannyBuf::release()
dy.release();
dx_buf.release();
dy_buf.release();
- edgeBuf.release();
+ magBuf.release();
+ mapBuf.release();
trackBuf1.release();
trackBuf2.release();
- if(counter)
- {
- openCLFree(counter);
- counter = NULL;
- }
}
namespace cv
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
- void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
+ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, oclMat& counter, int rows, int cols)
{
Context *clCxt = map.clCxt;
- String kernelName = "edgesHysteresisLocal";
- vector< pair<size_t, const void *> > args;
+ std::vector< std::pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
+ Mat counterMat(counter.rows, counter.cols, counter.type());
+ counterMat.at<int>(0, 0) = 0;
+ counter.upload(counterMat);
+
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counter));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map.data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st1.data));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counter.data));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&stepBytes));
+ cl_int stepBytes = map.step;
- args.push_back( make_pair( sizeof(cl_int), (void *)&offsetBytes));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&stepBytes));
+ cl_int offsetBytes = map.offset;
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&offsetBytes));
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisLocal", globalThreads, localThreads, args, -1, -1);
}
- void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
+ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, oclMat& counter, int rows, int cols)
{
- unsigned int count;
- openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
Context *clCxt = map.clCxt;
- String kernelName = "edgesHysteresisGlobal";
- vector< pair<size_t, const void *> > args;
+ std::vector< std::pair<size_t, const void *> > args;
size_t localThreads[3] = {128, 1, 1};
- int count_i[1] = {0};
- while(count > 0)
+ while(1 > 0)
{
- openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
+ Mat counterMat; counter.download(counterMat);
+ int count = counterMat.at<int>(0, 0);
+ CV_Assert(count >= 0);
+ if (count == 0)
+ break;
+
+ counterMat.at<int>(0, 0) = 0;
+ counter.upload(counterMat);
args.clear();
- size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
+ size_t globalThreads[3] = {std::min((unsigned)count, 65535u) * 128, divUp(count, 65535), 1};
- args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&count));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map.data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st1.data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st2.data));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counter));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counter.data));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&count));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
- openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
- openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
+ openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisGlobal", globalThreads, localThreads, args, -1, -1);
std::swap(st1, st2);
}
}
void openCLFree(void *devPtr)
{
+ #ifdef CHECK_MEMORY_CORRUPTION
+ bool failBefore = false, failAfter = false;
+ CheckBuffers data;
+ std::map<cl_mem, CheckBuffers>::iterator i = __check_buffers.find((cl_mem)devPtr);
+ if (i != __check_buffers.end())
+ {
+ data = i->second;
+ Context* ctx = Context::getContext();
+ std::vector<uchar> checkBefore(__memory_corruption_check_bytes);
+ std::vector<uchar> checkAfter(__memory_corruption_check_bytes);
+ openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(ctx),
+ data.mainBuffer, CL_TRUE, 0, __memory_corruption_check_bytes, &checkBefore[0],
+ 0, NULL, NULL));
+ openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(ctx),
+ data.mainBuffer, CL_TRUE, __memory_corruption_check_bytes + data.size, __memory_corruption_check_bytes, &checkAfter[0],
+ 0, NULL, NULL));
+
+ std::vector<int> tmp(__memory_corruption_check_bytes / sizeof(int),
+ __memory_corruption_check_pattern);
+
+ if (memcmp(&checkBefore[0], &tmp[0], __memory_corruption_check_bytes) != 0)
+ {
+ failBefore = true;
+ }
+ if (memcmp(&checkAfter[0], &tmp[0], __memory_corruption_check_bytes) != 0)
+ {
+ failAfter = true;
+ }
+ openCLSafeCall(clReleaseMemObject(data.mainBuffer));
+ __check_buffers.erase(i);
+ }
+ #endif
openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
+ #ifdef CHECK_MEMORY_CORRUPTION
+ if (failBefore)
+ {
+ #ifdef CHECK_MEMORY_CORRUPTION_PRINT_ERROR
+ std::cerr << "ERROR: Memory corruption detected: before buffer: " << cv::format("widthInBytes=%d height=%d", (int)data.widthInBytes, (int)data.height) << std::endl;
+ #endif
+ #ifdef CHECK_MEMORY_CORRUPTION_RAISE_ERROR
+ CV_Error(CV_StsInternal, "Memory corruption detected: before buffer");
+ #endif
+ }
+ if (failAfter)
+ {
+ #ifdef CHECK_MEMORY_CORRUPTION_PRINT_ERROR
+ std::cerr << "ERROR: Memory corruption detected: after buffer: " << cv::format("widthInBytes=%d height=%d", (int)data.widthInBytes, (int)data.height) << std::endl;
+ #endif
+ #ifdef CHECK_MEMORY_CORRUPTION_RAISE_ERROR
+ CV_Error(CV_StsInternal, "Memory corruption detected: after buffer");
+ #endif
+ }
+ #endif
}
-cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName)
+cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName)
{
return openCLGetKernelFromSource(ctx, source, kernelName, NULL);
}
return opt;
}
- void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
- size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
-cl_kernel openCLGetKernelFromSource(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, int channels,
++cl_kernel openCLGetKernelFromSource(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, int channels,
int depth, const char *build_options)
{
//construct kernel name
idxStr << "_C" << channels;
if(depth != -1)
idxStr << "_D" << depth;
- kernelName += idxStr.str();
+ kernelName = kernelName + idxStr.str();
- cl_kernel kernel;
std::string fixedOptions = removeDuplicatedWhiteSpaces(build_options);
- kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str());
+ cl_kernel kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str());
+ return kernel;
+ }
- size_t localThreads[3], vector< pair<size_t, const void *> > &args)
+ void openCLExecuteKernel(Context *ctx, cl_kernel kernel, size_t globalThreads[3],
++ size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args)
+ {
if ( localThreads != NULL)
{
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
openCLSafeCall(clReleaseKernel(kernel));
}
-void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
- size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
++void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
++ size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
+ int depth, const char *build_options)
+ {
+ cl_kernel kernel = openCLGetKernelFromSource(ctx, source, kernelName, channels, depth, build_options);
+
+ openCLExecuteKernel(ctx, kernel, globalThreads, localThreads, args);
+ }
+
-void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
+void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth)
+ std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{
openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args,
channels, depth, NULL);
else
kernel = _kernel;
- Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);
- Ptr<MorphologyFilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);
++ Ptr<MorphologyFilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations)
++ .staticCast<MorphologyFilterEngine_GPU>();
f->apply(src, dst);
}
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows));
- CV_Assert((src.oclchannels() == dst.oclchannels()));
- CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
- CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
- CV_Assert(ksize.width == ksize.height);
- Context *clCxt = src.clCxt;
-
- int filterWidth = ksize.width;
- bool ksize_3x3 = filterWidth == 3 && src.type() != CV_32FC4 && src.type() != CV_32FC3; // CV_32FC4 is not tuned up with filter2d_3x3 kernel
+ CV_Assert(src.oclchannels() == dst.oclchannels());
- String kernelName = ksize_3x3 ? "filter2D_3x3" : "filter2D";
+ CV_Assert(kernel.cols == ksize.width && kernel.rows == ksize.height);
+ CV_Assert(kernel.channels() == 1);
- size_t src_offset_x = (src.offset % src.step) / src.elemSize();
- size_t src_offset_y = src.offset / src.step;
+ CV_Assert(anchor.x >= 0 && anchor.x < kernel.cols);
+ CV_Assert(anchor.y >= 0 && anchor.y < kernel.rows);
- size_t dst_offset_x = (dst.offset % dst.step) / dst.elemSize();
- size_t dst_offset_y = dst.offset / dst.step;
+ bool useDouble = src.depth() == CV_64F;
- int paddingPixels = filterWidth & (-2);
+ std::vector<float> kernelDataFloat;
+ std::vector<double> kernelDataDouble;
+ int kernel_size_y2_aligned = useDouble ?
+ _prepareKernelFilter2D<double>(kernelDataDouble, kernel)
+ : _prepareKernelFilter2D<float>(kernelDataFloat, kernel);
+ oclMat oclKernelParameter;
+ if (useDouble)
+ {
+ oclKernelParameter.createEx(1, kernelDataDouble.size(), CV_64FC1, DEVICE_MEM_R_ONLY, DEVICE_MEM_DEFAULT);
+ openCLMemcpy2D(src.clCxt, oclKernelParameter.data, kernelDataDouble.size()*sizeof(double),
+ &kernelDataDouble[0], kernelDataDouble.size()*sizeof(double),
+ kernelDataDouble.size()*sizeof(double), 1, clMemcpyHostToDevice);
+ }
+ else
+ {
+ oclKernelParameter.createEx(1, kernelDataFloat.size(), CV_32FC1, DEVICE_MEM_R_ONLY, DEVICE_MEM_DEFAULT);
+ openCLMemcpy2D(src.clCxt, oclKernelParameter.data, kernelDataFloat.size()*sizeof(float),
+ &kernelDataFloat[0], kernelDataFloat.size()*sizeof(float),
+ kernelDataFloat.size()*sizeof(float), 1, clMemcpyHostToDevice);
+ }
- size_t localThreads[3] = {ksize_3x3 ? 256 : 16, ksize_3x3 ? 1 : 16, 1};
- size_t globalThreads[3] = {src.wholecols, src.wholerows, 1};
+ size_t tryWorkItems = src.clCxt->getDeviceInfo().maxWorkItemSizes[0];
+ do {
+ size_t BLOCK_SIZE = tryWorkItems;
+ while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)src.cols * 2)
+ BLOCK_SIZE /= 2;
+ #if 1 // TODO Mode with several blocks requires a much more VGPRs, so this optimization is not actual for the current devices
+ size_t BLOCK_SIZE_Y = 1;
+ #else
+ size_t BLOCK_SIZE_Y = 8; // TODO Check heuristic value on devices
+ while (BLOCK_SIZE_Y < BLOCK_SIZE / 8 && BLOCK_SIZE_Y * src.clCxt->getDeviceInfo().maxComputeUnits * 32 < (size_t)src.rows)
+ BLOCK_SIZE_Y *= 2;
+ #endif
+
+ CV_Assert((size_t)ksize.width <= BLOCK_SIZE);
+
+ bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0;
+
- vector<pair<size_t , const void *> > args;
++ std::vector<std::pair<size_t , const void *> > args;
+
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
+ cl_uint stepBytes = src.step;
- args.push_back( make_pair( sizeof(cl_uint), (void *)&stepBytes));
++ args.push_back( std::make_pair( sizeof(cl_uint), (void *)&stepBytes));
+ int offsetXBytes = src.offset % src.step;
+ int offsetX = offsetXBytes / src.elemSize();
+ CV_Assert((int)(offsetX * src.elemSize()) == offsetXBytes);
+ int offsetY = src.offset / src.step;
+ int endX = (offsetX + src.cols);
+ int endY = (offsetY + src.rows);
+ cl_int rect[4] = {offsetX, offsetY, endX, endY};
+ if (!isIsolatedBorder)
+ {
+ rect[2] = src.wholecols;
+ rect[3] = src.wholerows;
+ }
- args.push_back( make_pair( sizeof(cl_int)*4, (void *)&rect[0]));
++ args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&rect[0]));
+
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
+ cl_uint _stepBytes = dst.step;
- args.push_back( make_pair( sizeof(cl_uint), (void *)&_stepBytes));
++ args.push_back( std::make_pair( sizeof(cl_uint), (void *)&_stepBytes));
+ int _offsetXBytes = dst.offset % dst.step;
+ int _offsetX = _offsetXBytes / dst.elemSize();
+ CV_Assert((int)(_offsetX * dst.elemSize()) == _offsetXBytes);
+ int _offsetY = dst.offset / dst.step;
+ int _endX = (_offsetX + dst.cols);
+ int _endY = (_offsetY + dst.rows);
+ cl_int _rect[4] = {_offsetX, _offsetY, _endX, _endY};
- args.push_back( make_pair( sizeof(cl_int)*4, (void *)&_rect[0]));
++ args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&_rect[0]));
+
+ float borderValue[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
+ double borderValueDouble[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
+ if ((borderType & ~BORDER_ISOLATED) == BORDER_CONSTANT)
+ {
+ if (useDouble)
- args.push_back( make_pair( sizeof(double) * src.oclchannels(), (void *)&borderValue[0]));
++ args.push_back( std::make_pair( sizeof(double) * src.oclchannels(), (void *)&borderValue[0]));
+ else
- args.push_back( make_pair( sizeof(float) * src.oclchannels(), (void *)&borderValueDouble[0]));
++ args.push_back( std::make_pair( sizeof(float) * src.oclchannels(), (void *)&borderValueDouble[0]));
+ }
- int cn = src.oclchannels();
- int src_step = (int)(src.step/src.elemSize());
- int dst_step = (int)(dst.step/src.elemSize());
- args.push_back( make_pair( sizeof(cl_mem), (void *)&oclKernelParameter.data));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&oclKernelParameter.data));
- int localWidth = localThreads[0] + paddingPixels;
- int localHeight = localThreads[1] + paddingPixels;
+ const char* btype = NULL;
- size_t localMemSize = ksize_3x3 ? 260 * 6 * src.elemSize() : (localWidth * localHeight) * src.elemSize();
+ switch (borderType & ~BORDER_ISOLATED)
+ {
+ case BORDER_CONSTANT:
+ btype = "BORDER_CONSTANT";
+ break;
+ case BORDER_REPLICATE:
+ btype = "BORDER_REPLICATE";
+ break;
+ case BORDER_REFLECT:
+ btype = "BORDER_REFLECT";
+ break;
+ case BORDER_WRAP:
+ CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
+ return;
+ case BORDER_REFLECT101:
+ btype = "BORDER_REFLECT_101";
+ break;
+ }
- int vector_lengths[4][7] = {{4, 4, 4, 4, 4, 4, 4},
- {4, 4, 1, 1, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1},
- {4, 4, 4, 4, 1, 1, 4}
- };
- int cols = dst.cols + ((dst_offset_x) & (vector_lengths[cn - 1][src.depth()] - 1));
+ int requiredTop = anchor.y;
+ int requiredLeft = BLOCK_SIZE; // not this: anchor.x;
+ int requiredBottom = ksize.height - 1 - anchor.y;
+ int requiredRight = BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x;
+ int h = isIsolatedBorder ? src.rows : src.wholerows;
+ int w = isIsolatedBorder ? src.cols : src.wholecols;
+ bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight;
+
+ char build_options[1024];
+ sprintf(build_options, "-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d "
+ "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D KERNEL_SIZE_Y2_ALIGNED=%d "
+ "-D %s -D %s -D %s",
+ (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y,
+ src.depth(), src.oclchannels(), useDouble ? 1 : 0,
+ anchor.x, anchor.y, ksize.width, ksize.height, kernel_size_y2_aligned,
+ btype,
+ extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
+ isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
+
+ size_t lt[3] = {BLOCK_SIZE, 1, 1};
+ size_t gt[3] = {divUp(dst.cols, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE, divUp(dst.rows, BLOCK_SIZE_Y), 1};
+
+ cl_kernel kernel = openCLGetKernelFromSource(src.clCxt, &filtering_filter2D, "filter2D", -1, -1, build_options);
+
+ size_t kernelWorkGroupSize;
+ openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(src.clCxt),
+ CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
+ if (lt[0] > kernelWorkGroupSize)
+ {
+ clReleaseKernel(kernel);
+ CV_Assert(BLOCK_SIZE > kernelWorkGroupSize);
+ tryWorkItems = kernelWorkGroupSize;
+ continue;
+ }
- std::vector< std::pair<size_t, const void *> > args;
- args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
- args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_step));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_step));
- args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
- args.push_back(std::make_pair(localMemSize, (void *)NULL));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset_x));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset_y));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_offset_x));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst_offset_y));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
- char btype[30];
- switch (borderType)
- {
- case 0:
- sprintf(btype, "BORDER_CONSTANT");
- break;
- case 1:
- sprintf(btype, "BORDER_REPLICATE");
- break;
- case 2:
- sprintf(btype, "BORDER_REFLECT");
- break;
- case 3:
- CV_Error(Error::StsUnsupportedFormat, "BORDER_WRAP is not supported!");
- return;
- case 4:
- sprintf(btype, "BORDER_REFLECT_101");
- break;
- }
- int type = src.depth();
- char build_options[150];
- sprintf(build_options, "-D %s -D IMG_C_%d_%d -D CN=%d -D FILTER_SIZE=%d", btype, cn, type, cn, ksize.width);
- openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ openCLExecuteKernel(src.clCxt, kernel, gt, lt, args); // kernel will be released here
+ } while (false);
}
- Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
+ Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int /*srcType*/, int /*dstType*/, const Mat &kernel, const Size &ksize,
const Point &anchor, int borderType)
{
- static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D};
-
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
-
- oclMat gpu_krnl;
Point norm_archor = anchor;
- normalizeKernel(kernel, gpu_krnl, CV_32FC1);
normalizeAnchor(norm_archor, ksize);
- return makePtr<LinearFilter_GPU>(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
- borderType);
+ return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, norm_archor, kernel, GPUFilter2D,
+ borderType));
}
Ptr<FilterEngine_GPU> cv::ocl::createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Point &anchor,
Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
const Ptr<BaseColumnFilter_GPU> &columnFilter)
{
- return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
+ return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
}
- /*
- **data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4
- **support four border types: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101
- */
-
- static void GPUFilterBox_8u_C1R(const oclMat &src, oclMat &dst,
+ static void GPUFilterBox(const oclMat &src, oclMat &dst,
Size &ksize, const Point anchor, const int borderType)
{
//Normalize the result by default
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows));
- Context *clCxt = src.clCxt;
-
- String kernelName = "boxFilter_C1_D0";
-
- char btype[30];
-
- switch (borderType)
- {
- case 0:
- sprintf(btype, "BORDER_CONSTANT");
- break;
- case 1:
- sprintf(btype, "BORDER_REPLICATE");
- break;
- case 2:
- sprintf(btype, "BORDER_REFLECT");
- break;
- case 3:
- CV_Error(Error::StsUnsupportedFormat, "BORDER_WRAP is not supported!");
- return;
- case 4:
- sprintf(btype, "BORDER_REFLECT_101");
- break;
- }
-
- char build_options[150];
- sprintf(build_options, "-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s", anchor.x, anchor.y, ksize.width, ksize.height, btype);
-
- size_t blockSizeX = 256, blockSizeY = 1;
- size_t gSize = blockSizeX - (ksize.width - 1);
- size_t threads = (dst.offset % dst.step % 4 + dst.cols + 3) / 4;
- size_t globalSizeX = threads % gSize == 0 ? threads / gSize * blockSizeX : (threads / gSize + 1) * blockSizeX;
- size_t globalSizeY = ((dst.rows + 1) / 2) % blockSizeY == 0 ? ((dst.rows + 1) / 2) : (((dst.rows + 1) / 2) / blockSizeY + 1) * blockSizeY;
-
- size_t globalThreads[3] = { globalSizeX, globalSizeY, 1 };
- size_t localThreads[3] = { blockSizeX, blockSizeY, 1 };
-
- std::vector<std::pair<size_t , const void *> > args;
- args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
- args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
- args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-
- openCLExecuteKernel(clCxt, &filtering_boxFilter, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
- }
-
- static void GPUFilterBox_8u_C4R(const oclMat &src, oclMat &dst,
- Size &ksize, const Point anchor, const int borderType)
- {
- //Normalize the result by default
- float alpha = ksize.height * ksize.width;
-
- CV_Assert(src.clCxt == dst.clCxt);
- CV_Assert((src.cols == dst.cols) &&
- (src.rows == dst.rows));
- Context *clCxt = src.clCxt;
-
- String kernelName = "boxFilter_C4_D0";
-
- char btype[30];
-
- switch (borderType)
- {
- case 0:
- sprintf(btype, "BORDER_CONSTANT");
- break;
- case 1:
- sprintf(btype, "BORDER_REPLICATE");
- break;
- case 2:
- sprintf(btype, "BORDER_REFLECT");
- break;
- case 3:
- CV_Error(Error::StsUnsupportedFormat, "BORDER_WRAP is not supported!");
- return;
- case 4:
- sprintf(btype, "BORDER_REFLECT_101");
- break;
- }
-
- char build_options[150];
- sprintf(build_options, "-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s", anchor.x, anchor.y, ksize.width, ksize.height, btype);
-
- size_t blockSizeX = 256, blockSizeY = 1;
- size_t gSize = blockSizeX - ksize.width / 2 * 2;
- size_t globalSizeX = (src.cols) % gSize == 0 ? src.cols / gSize * blockSizeX : (src.cols / gSize + 1) * blockSizeX;
- size_t rows_per_thread = 2;
- size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ? ((src.rows + rows_per_thread - 1) / rows_per_thread) : (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
-
- size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
- size_t localThreads[3] = { blockSizeX, blockSizeY, 1};
-
- std::vector<std::pair<size_t , const void *> > args;
- args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
- args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
- args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-
- openCLExecuteKernel(clCxt, &filtering_boxFilter, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
- }
-
- static void GPUFilterBox_32F_C1R(const oclMat &src, oclMat &dst,
- Size &ksize, const Point anchor, const int borderType)
- {
- //Normalize the result by default
- float alpha = ksize.height * ksize.width;
-
- CV_Assert(src.clCxt == dst.clCxt);
- CV_Assert((src.cols == dst.cols) &&
- (src.rows == dst.rows));
- Context *clCxt = src.clCxt;
-
- String kernelName = "boxFilter_C1_D5";
-
- char btype[30];
-
- switch (borderType)
- {
- case 0:
- sprintf(btype, "BORDER_CONSTANT");
- break;
- case 1:
- sprintf(btype, "BORDER_REPLICATE");
- break;
- case 2:
- sprintf(btype, "BORDER_REFLECT");
- break;
- case 3:
- CV_Error(Error::StsUnsupportedFormat, "BORDER_WRAP is not supported!");
- return;
- case 4:
- sprintf(btype, "BORDER_REFLECT_101");
- break;
- }
-
- char build_options[150];
- sprintf(build_options, "-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s", anchor.x, anchor.y, ksize.width, ksize.height, btype);
-
- size_t blockSizeX = 256, blockSizeY = 1;
- size_t gSize = blockSizeX - ksize.width / 2 * 2;
- size_t globalSizeX = (src.cols) % gSize == 0 ? src.cols / gSize * blockSizeX : (src.cols / gSize + 1) * blockSizeX;
- size_t rows_per_thread = 2;
- size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ? ((src.rows + rows_per_thread - 1) / rows_per_thread) : (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
-
-
- size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
- size_t localThreads[3] = { blockSizeX, blockSizeY, 1};
-
- std::vector<std::pair<size_t , const void *> > args;
- args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
- args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
- args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
-
- openCLExecuteKernel(clCxt, &filtering_boxFilter, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
- }
-
- static void GPUFilterBox_32F_C4R(const oclMat &src, oclMat &dst,
- Size &ksize, const Point anchor, const int borderType)
- {
- //Normalize the result by default
- float alpha = ksize.height * ksize.width;
-
- CV_Assert(src.clCxt == dst.clCxt);
- CV_Assert((src.cols == dst.cols) &&
- (src.rows == dst.rows));
- Context *clCxt = src.clCxt;
-
- String kernelName = "boxFilter_C4_D5";
-
- char btype[30];
-
- switch (borderType)
- {
- case 0:
- sprintf(btype, "BORDER_CONSTANT");
- break;
- case 1:
- sprintf(btype, "BORDER_REPLICATE");
- break;
- case 2:
- sprintf(btype, "BORDER_REFLECT");
- break;
- case 3:
- CV_Error(Error::StsUnsupportedFormat, "BORDER_WRAP is not supported!");
- return;
- case 4:
- sprintf(btype, "BORDER_REFLECT_101");
- break;
- }
+ CV_Assert(src.oclchannels() == dst.oclchannels());
- char build_options[150];
- sprintf(build_options, "-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s", anchor.x, anchor.y, ksize.width, ksize.height, btype);
+ size_t tryWorkItems = src.clCxt->getDeviceInfo().maxWorkItemSizes[0];
+ do {
+ size_t BLOCK_SIZE = tryWorkItems;
+ while (BLOCK_SIZE > 32 && BLOCK_SIZE >= (size_t)ksize.width * 2 && BLOCK_SIZE > (size_t)src.cols * 2)
+ BLOCK_SIZE /= 2;
+ size_t BLOCK_SIZE_Y = 8; // TODO Check heuristic value on devices
+ while (BLOCK_SIZE_Y < BLOCK_SIZE / 8 && BLOCK_SIZE_Y * src.clCxt->getDeviceInfo().maxComputeUnits * 32 < (size_t)src.rows)
+ BLOCK_SIZE_Y *= 2;
+
+ CV_Assert((size_t)ksize.width <= BLOCK_SIZE);
+
+ bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0;
+
- vector<pair<size_t , const void *> > args;
++ std::vector<std::pair<size_t , const void *> > args;
+
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
+ cl_uint stepBytes = src.step;
- args.push_back( make_pair( sizeof(cl_uint), (void *)&stepBytes));
++ args.push_back( std::make_pair( sizeof(cl_uint), (void *)&stepBytes));
+ int offsetXBytes = src.offset % src.step;
+ int offsetX = offsetXBytes / src.elemSize();
+ CV_Assert((int)(offsetX * src.elemSize()) == offsetXBytes);
+ int offsetY = src.offset / src.step;
+ int endX = (offsetX + src.cols);
+ int endY = (offsetY + src.rows);
+ cl_int rect[4] = {offsetX, offsetY, endX, endY};
+ if (!isIsolatedBorder)
+ {
+ rect[2] = src.wholecols;
+ rect[3] = src.wholerows;
+ }
- args.push_back( make_pair( sizeof(cl_int)*4, (void *)&rect[0]));
++ args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&rect[0]));
+
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
+ cl_uint _stepBytes = dst.step;
- args.push_back( make_pair( sizeof(cl_uint), (void *)&_stepBytes));
++ args.push_back( std::make_pair( sizeof(cl_uint), (void *)&_stepBytes));
+ int _offsetXBytes = dst.offset % dst.step;
+ int _offsetX = _offsetXBytes / dst.elemSize();
+ CV_Assert((int)(_offsetX * dst.elemSize()) == _offsetXBytes);
+ int _offsetY = dst.offset / dst.step;
+ int _endX = (_offsetX + dst.cols);
+ int _endY = (_offsetY + dst.rows);
+ cl_int _rect[4] = {_offsetX, _offsetY, _endX, _endY};
- args.push_back( make_pair( sizeof(cl_int)*4, (void *)&_rect[0]));
++ args.push_back( std::make_pair( sizeof(cl_int)*4, (void *)&_rect[0]));
+
+ bool useDouble = src.depth() == CV_64F;
+
+ float borderValue[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
+ double borderValueDouble[4] = {0, 0, 0, 0}; // DON'T move into 'if' body
+ if ((borderType & ~BORDER_ISOLATED) == BORDER_CONSTANT)
+ {
+ if (useDouble)
- args.push_back( make_pair( sizeof(double) * src.oclchannels(), (void *)&borderValue[0]));
++ args.push_back( std::make_pair( sizeof(double) * src.oclchannels(), (void *)&borderValue[0]));
+ else
- args.push_back( make_pair( sizeof(float) * src.oclchannels(), (void *)&borderValueDouble[0]));
++ args.push_back( std::make_pair( sizeof(float) * src.oclchannels(), (void *)&borderValueDouble[0]));
+ }
- size_t blockSizeX = 256, blockSizeY = 1;
- size_t gSize = blockSizeX - ksize.width / 2 * 2;
- size_t globalSizeX = (src.cols) % gSize == 0 ? src.cols / gSize * blockSizeX : (src.cols / gSize + 1) * blockSizeX;
- size_t rows_per_thread = 2;
- size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ? ((src.rows + rows_per_thread - 1) / rows_per_thread) : (((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
+ double alphaDouble = alpha; // DON'T move into 'if' body
+ if (useDouble)
- args.push_back( make_pair( sizeof(double), (void *)&alphaDouble));
++ args.push_back( std::make_pair( sizeof(double), (void *)&alphaDouble));
+ else
- args.push_back( make_pair( sizeof(float), (void *)&alpha));
++ args.push_back( std::make_pair( sizeof(float), (void *)&alpha));
+ const char* btype = NULL;
- size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
- size_t localThreads[3] = { blockSizeX, blockSizeY, 1};
+ switch (borderType & ~BORDER_ISOLATED)
+ {
+ case BORDER_CONSTANT:
+ btype = "BORDER_CONSTANT";
+ break;
+ case BORDER_REPLICATE:
+ btype = "BORDER_REPLICATE";
+ break;
+ case BORDER_REFLECT:
+ btype = "BORDER_REFLECT";
+ break;
+ case BORDER_WRAP:
+ CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
+ return;
+ case BORDER_REFLECT101:
+ btype = "BORDER_REFLECT_101";
+ break;
+ }
- std::vector<std::pair<size_t , const void *> > args;
- args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
- args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
- args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ int requiredTop = anchor.y;
+ int requiredLeft = BLOCK_SIZE; // not this: anchor.x;
+ int requiredBottom = ksize.height - 1 - anchor.y;
+ int requiredRight = BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x;
+ int h = isIsolatedBorder ? src.rows : src.wholerows;
+ int w = isIsolatedBorder ? src.cols : src.wholecols;
+ bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight;
+
+ CV_Assert(w >= ksize.width && h >= ksize.height); // TODO Other cases are not tested well
+
+ char build_options[1024];
+ sprintf(build_options, "-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s -D %s -D %s",
+ (int)BLOCK_SIZE, (int)BLOCK_SIZE_Y,
+ src.depth(), src.oclchannels(), useDouble ? 1 : 0,
+ anchor.x, anchor.y, ksize.width, ksize.height,
+ btype,
+ extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
+ isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
+
+ size_t lt[3] = {BLOCK_SIZE, 1, 1};
+ size_t gt[3] = {divUp(dst.cols, BLOCK_SIZE - (ksize.width - 1)) * BLOCK_SIZE, divUp(dst.rows, BLOCK_SIZE_Y), 1};
+
+ cl_kernel kernel = openCLGetKernelFromSource(src.clCxt, &filtering_boxFilter, "boxFilter", -1, -1, build_options);
+
+ size_t kernelWorkGroupSize;
+ openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(src.clCxt),
+ CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
+ if (lt[0] > kernelWorkGroupSize)
+ {
+ clReleaseKernel(kernel);
+ CV_Assert(BLOCK_SIZE > kernelWorkGroupSize);
+ tryWorkItems = kernelWorkGroupSize;
+ continue;
+ }
- openCLExecuteKernel(clCxt, &filtering_boxFilter, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ openCLExecuteKernel(src.clCxt, kernel, gt, lt, args); // kernel will be released here
+ } while (false);
}
-
- Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int srcType, int dstType,
+ Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int /*srcType*/, int /*dstType*/,
const Size &ksize, Point anchor, int borderType)
{
- static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, GPUFilterBox_8u_C4R, GPUFilterBox_8u_C4R},
- {0, GPUFilterBox_32F_C1R, 0, GPUFilterBox_32F_C4R, GPUFilterBox_32F_C4R}
- };
- //Remove this check if more data types need to be supported.
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 ||
- srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
-
normalizeAnchor(anchor, ksize);
- return makePtr<GPUBoxFilter>(ksize, anchor,
- borderType, FilterBox_callers[(CV_MAT_DEPTH(srcType) == CV_32F)][CV_MAT_CN(srcType)]);
+ return Ptr<BaseFilter_GPU>(new GPUBoxFilter(ksize, anchor,
+ borderType, GPUFilterBox));
}
Ptr<FilterEngine_GPU> cv::ocl::createBoxFilter_GPU(int srcType, int dstType,
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, bordertype);
}
- void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
+ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale,
+ double delta, int borderType)
{
+ CV_Assert(delta == 0);
+
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
- CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
+ CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
pq.s[3] = gcascade->pq3;
float correction = gcascade->inv_window_area;
- vector<pair<size_t, const void *> > args;
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&pixelstep ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
- args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
- args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
- args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
+ std::vector<std::pair<size_t, const void *> > args;
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&pixelstep ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&p ));
+ args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&pq ));
+ args.push_back ( std::make_pair(sizeof(cl_float) , (void *)&correction ));
- const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+ if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE))
+ {
+ //setup local group size
+ localThreads[0] = 8;
+ localThreads[1] = 16;
+ localThreads[2] = 1;
+
+ //init maximal number of workgroups
+ int WGNumX = 1+(sizev[0].width /(localThreads[0]));
+ int WGNumY = 1+(sizev[0].height/(localThreads[1]));
+ int WGNumZ = loopcount;
+ int WGNum = 0; //accurate number of non -empty workgroups
+ oclMat oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U);
+ {
+ cl_int4* pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status);
+ openCLVerifyCall(status);
+ for(int z=0;z<WGNumZ;++z)
+ {
+ int Width = (scaleinfo[z].width_height >> 16)&0xFFFF;
+ int Height = (scaleinfo[z].width_height >> 0 )& 0xFFFF;
+ for(int y=0;y<WGNumY;++y)
+ {
+ int gy = y*localThreads[1];
+ if(gy>=(Height-cascade->orig_window_size.height))
+ continue; // no data to process
+ for(int x=0;x<WGNumX;++x)
+ {
+ int gx = x*localThreads[0];
+ if(gx>=(Width-cascade->orig_window_size.width))
+ continue; // no data to process
+
+ // save no-empty workgroup info into array
+ pWGInfo[WGNum].s[0] = scaleinfo[z].width_height;
+ pWGInfo[WGNum].s[1] = (gx << 16) | gy;
+ pWGInfo[WGNum].s[2] = scaleinfo[z].imgoff;
+ memcpy(&(pWGInfo[WGNum].s[3]),&(scaleinfo[z].factor),sizeof(float));
+ WGNum++;
+ }
+ }
+ }
+ openCLSafeCall(clEnqueueUnmapMemObject(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,pWGInfo,0,0,0));
+ pWGInfo = NULL;
+ }
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
+ // setup global sizes to have linear array of workgroups with WGNum size
+ globalThreads[0] = localThreads[0]*WGNum;
+ globalThreads[1] = localThreads[1];
+ globalThreads[2] = 1;
- openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
+ #define NODE_SIZE 12
+ // pack node info to have less memory loads
+ oclMat oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U);
+ {
+ cl_int status;
+ cl_int* pNodesPK = (cl_int*)clEnqueueMapBuffer(getClCommandQueue(oclNodesPK.clCxt),(cl_mem)oclNodesPK.datastart,true,CL_MAP_WRITE, 0, oclNodesPK.step, 0,0,0,&status);
+ openCLVerifyCall(status);
+ //use known local data stride to precalulate indexes
+ int DATA_SIZE_X = (localThreads[0]+cascade->orig_window_size.width);
+ // check that maximal value is less than maximal unsigned short
+ assert(DATA_SIZE_X*cascade->orig_window_size.height+cascade->orig_window_size.width < USHRT_MAX);
+ for(int i = 0;i<nodenum;++i)
+ {//process each node from classifier
+ struct NodePK
+ {
+ unsigned short slm_index[3][4];
+ float weight[3];
+ float threshold;
+ float alpha[2];
+ };
+ struct NodePK * pOut = (struct NodePK *)(pNodesPK + NODE_SIZE*i);
+ for(int k=0;k<3;++k)
+ {// calc 4 short indexes in shared local mem for each rectangle instead of 2 (x,y) pair.
+ int* p = &(node[i].p[k][0]);
+ pOut->slm_index[k][0] = (unsigned short)(p[1]*DATA_SIZE_X+p[0]);
+ pOut->slm_index[k][1] = (unsigned short)(p[1]*DATA_SIZE_X+p[2]);
+ pOut->slm_index[k][2] = (unsigned short)(p[3]*DATA_SIZE_X+p[0]);
+ pOut->slm_index[k][3] = (unsigned short)(p[3]*DATA_SIZE_X+p[2]);
+ }
+ //store used float point values for each node
+ pOut->weight[0] = node[i].weight[0];
+ pOut->weight[1] = node[i].weight[1];
+ pOut->weight[2] = node[i].weight[2];
+ pOut->threshold = node[i].threshold;
+ pOut->alpha[0] = node[i].alpha[0];
- pOut->alpha[1] = node[i].alpha[1];
++ pOut->alpha[1] = node[i].alpha[1];
+ }
+ openCLSafeCall(clEnqueueUnmapMemObject(getClCommandQueue(oclNodesPK.clCxt),(cl_mem)oclNodesPK.datastart,pNodesPK,0,0,0));
+ pNodesPK = NULL;
+ }
+ // add 2 additional buffers (WGinfo and packed nodes) as 2 last args
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&oclNodesPK.datastart ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&oclWGInfo.datastart ));
++ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&oclNodesPK.datastart ));
++ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&oclWGInfo.datastart ));
+
+ //form build options for kernel
- string options = "-D PACKED_CLASSIFIER";
- options += format(" -D NODE_SIZE=%d",NODE_SIZE);
- options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
- options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
- options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
- options += format(" -D LSx=%d",localThreads[0]);
- options += format(" -D LSy=%d",localThreads[1]);
- options += format(" -D SPLITNODE=%d",splitnode);
- options += format(" -D SPLITSTAGE=%d",splitstage);
- options += format(" -D OUTPUTSZ=%d",outputsz);
++ String options = "-D PACKED_CLASSIFIER";
++ options = options + format(" -D NODE_SIZE=%d",NODE_SIZE);
++ options = options + format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
++ options = options + format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
++ options = options + format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
++ options = options + format(" -D LSx=%d",localThreads[0]);
++ options = options + format(" -D LSy=%d",localThreads[1]);
++ options = options + format(" -D SPLITNODE=%d",splitnode);
++ options = options + format(" -D SPLITSTAGE=%d",splitstage);
++ options = options + format(" -D OUTPUTSZ=%d",outputsz);
+
+ // init candiate global count by 0
+ int pattern = 0;
+ openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
+ // execute face detector
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, localThreads, args, -1, -1, options.c_str());
+ //read candidate buffer back and put it into host list
+ openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
+ assert(candidate[0]<outputsz);
+ //printf("candidate[0]=%d\n",candidate[0]);
+ for(int i = 1; i <= candidate[0]; i++)
+ {
+ allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],candidate[4 * i + 2], candidate[4 * i + 3]));
+ }
+ }
+ else
+ {
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
- for(int i = 0; i < outputsz; i++)
- if(candidate[4 * i + 2] != 0)
- allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
- candidate[4 * i + 2], candidate[4 * i + 3]));
+ openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
+
+ for(int i = 0; i < outputsz; i++)
+ if(candidate[4 * i + 2] != 0)
+ allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
+ candidate[4 * i + 2], candidate[4 * i + 3]));
+ }
free(scaleinfo);
free(candidate);
/////////////////////////////////////////////////////////////////////////////////////
// threshold
- typedef void (*gpuThresh_t)(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type);
-
- static void threshold_8u(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
+ static std::vector<uchar> scalarToVector(const cv::Scalar & sc, int depth, int ocn, int cn)
{
- uchar thresh_uchar = cvFloor(thresh);
- uchar max_val = cvRound(maxVal);
+ CV_Assert(ocn == cn || (ocn == 4 && cn == 3));
- size_t cols = (dst.cols + (dst.offset % 16) + 15) / 16;
- size_t bSizeX = 16, bSizeY = 16;
- size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX;
- size_t gSizeY = dst.rows;
- size_t globalThreads[3] = {gSizeX, gSizeY, 1};
- size_t localThreads[3] = {bSizeX, bSizeY, 1};
+ static const int sizeMap[] = { sizeof(uchar), sizeof(char), sizeof(ushort),
+ sizeof(short), sizeof(int), sizeof(float), sizeof(double) };
- std::vector< std::pair<size_t, const void *> > args;
- args.push_back( std::make_pair(sizeof(cl_mem), &src.data));
- args.push_back( std::make_pair(sizeof(cl_mem), &dst.data));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.offset));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.step));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
- args.push_back( std::make_pair(sizeof(cl_uchar), (void *)&thresh_uchar));
- args.push_back( std::make_pair(sizeof(cl_uchar), (void *)&max_val));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&type));
- openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth());
+ int elemSize1 = sizeMap[depth];
+ int bufSize = elemSize1 * ocn;
+ std::vector<uchar> _buf(bufSize);
+ uchar * buf = &_buf[0];
+ scalarToRawData(sc, buf, CV_MAKE_TYPE(depth, cn));
+ memset(buf + elemSize1 * cn, 0, (ocn - cn) * elemSize1);
+
+ return _buf;
}
- static void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
+ static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
{
- float thresh_f = thresh;
- float max_val = maxVal;
- int dst_offset = (dst.offset >> 2);
- int dst_step = (dst.step >> 2);
- int src_offset = (src.offset >> 2);
- int src_step = (src.step >> 2);
-
- size_t cols = (dst.cols + (dst_offset & 3) + 3) / 4;
- size_t bSizeX = 16, bSizeY = 16;
- size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX;
- size_t gSizeY = dst.rows;
- size_t globalThreads[3] = {gSizeX, gSizeY, 1};
- size_t localThreads[3] = {bSizeX, bSizeY, 1};
+ bool ival = src.depth() < CV_32F;
+ int cn = src.channels(), vecSize = 4, depth = src.depth();
+ std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(),
+ dst.oclchannels(), dst.channels());
+ std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels());
+
+ const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
+ "THRESH_TOZERO", "THRESH_TOZERO_INV" };
+ const char * const channelMap[] = { "", "", "2", "4", "4" };
+ const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
+ std::string buildOptions = format("-D T=%s%s -D %s", typeMap[depth], channelMap[cn], thresholdMap[thresholdType]);
+
+ int elemSize = src.elemSize();
+ int src_step = src.step / elemSize, src_offset = src.offset / elemSize;
+ int dst_step = dst.step / elemSize, dst_offset = dst.offset / elemSize;
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
- args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset));
- args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
- args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
- args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
- args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0]));
+ std::vector< std::pair<size_t, const void *> > args;
- args.push_back( std::make_pair(sizeof(cl_mem), &src.data));
- args.push_back( std::make_pair(sizeof(cl_mem), &dst.data));
++ args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_offset));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&src_step));
++ args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_offset));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst_step));
- args.push_back( std::make_pair(sizeof(cl_float), (void *)&thresh_f));
- args.push_back( std::make_pair(sizeof(cl_float), (void *)&max_val));
- args.push_back( std::make_pair(sizeof(cl_int), (void *)&type));
++ args.push_back( std::make_pair(thresholdValue.size(), (void *)&thresholdValue[0]));
++ args.push_back( std::make_pair(maxValue.size(), (void *)&maxValue[0]));
+
+ int max_index = dst.cols, cols = dst.cols;
+ if (cn == 1 && vecSize > 1)
+ {
+ CV_Assert(((vecSize - 1) & vecSize) == 0 && vecSize <= 16);
+ cols = divUp(cols, vecSize);
+ buildOptions += format(" -D VECTORIZED -D VT=%s%d -D VLOADN=vload%d -D VECSIZE=%d -D VSTOREN=vstore%d",
+ typeMap[depth], vecSize, vecSize, vecSize, vecSize);
+
+ int vecSizeBytes = vecSize * dst.elemSize1();
+ if ((dst.offset % dst.step) % vecSizeBytes == 0 && dst.step % vecSizeBytes == 0)
+ buildOptions += " -D DST_ALIGNED";
+ if ((src.offset % src.step) % vecSizeBytes == 0 && src.step % vecSizeBytes == 0)
+ buildOptions += " -D SRC_ALIGNED";
+
- args.push_back( make_pair(sizeof(cl_int), (void *)&max_index));
++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&max_index));
+ }
- openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth());
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
++ args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols));
+
+ size_t localThreads[3] = { 16, 16, 1 };
+ size_t globalThreads[3] = { cols, dst.rows, 1 };
+
+ openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args,
+ -1, -1, buildOptions.c_str());
}
- // threshold: support 8UC1 and 32FC1 data type and five threshold type
- double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
+ double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType)
{
- //TODO: These limitations shall be removed later.
- CV_Assert(src.type() == CV_8UC1 || src.type() == CV_32FC1);
- CV_Assert(type == THRESH_BINARY || type == THRESH_BINARY_INV || type == THRESH_TRUNC
- || type == THRESH_TOZERO || type == THRESH_TOZERO_INV );
+ CV_Assert(thresholdType == THRESH_BINARY || thresholdType == THRESH_BINARY_INV || thresholdType == THRESH_TRUNC
+ || thresholdType == THRESH_TOZERO || thresholdType == THRESH_TOZERO_INV);
- static const gpuThresh_t gpuThresh_callers[2] = {threshold_8u, threshold_32f};
-
- dst.create( src.size(), src.type() );
- gpuThresh_callers[(src.type() == CV_32FC1)](src, dst, thresh, maxVal, type);
+ dst.create(src.size(), src.type());
+ threshold_runner(src, dst, thresh, maxVal, thresholdType);
return thresh;
}
if (ksize > 0)
{
- Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
- Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
+ Context* clCxt = Context::getContext();
+ if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 &&
+ src.cols % 8 == 0 && src.rows % 8 == 0 &&
+ ksize==3 &&
+ (borderType ==cv::BORDER_REFLECT ||
+ borderType == cv::BORDER_REPLICATE ||
+ borderType ==cv::BORDER_REFLECT101 ||
+ borderType ==cv::BORDER_WRAP))
+ {
+ Dx.create(src.size(), CV_32FC1);
+ Dy.create(src.size(), CV_32FC1);
+
+ const unsigned int block_x = 8;
+ const unsigned int block_y = 8;
+
+ unsigned int src_pitch = src.step;
+ unsigned int dst_pitch = Dx.cols;
+
+ float _scale = scale;
+
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
+ args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
+ args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
+ size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
+
- string option = "-D BLK_X=8 -D BLK_Y=8";
++ String option = "-D BLK_X=8 -D BLK_Y=8";
+ switch(borderType)
+ {
+ case cv::BORDER_REPLICATE:
- option += " -D BORDER_REPLICATE";
++ option = option + " -D BORDER_REPLICATE";
+ break;
+ case cv::BORDER_REFLECT:
- option += " -D BORDER_REFLECT";
++ option = option + " -D BORDER_REFLECT";
+ break;
+ case cv::BORDER_REFLECT101:
- option += " -D BORDER_REFLECT101";
++ option = option + " -D BORDER_REFLECT101";
+ break;
+ case cv::BORDER_WRAP:
- option += " -D BORDER_WRAP";
++ option = option + " -D BORDER_WRAP";
+ break;
+ }
+ openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
+ }
+ else
+ {
+ Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
+ Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
+ }
}
else
{
size_t gt[3] = { globalSizeX, globalSizeY, 1 };
size_t lt[3] = { blockSizeX, blockSizeY, 1 };
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&Dy.data));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&Dx.wholerows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&Dx.wholecols ));
- args.push_back( make_pair(sizeof(cl_int), (void *)&Dx.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&Dy.wholerows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&Dy.wholecols ));
- args.push_back( make_pair(sizeof(cl_int), (void *)&Dy.step));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
- args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
- args.push_back( make_pair( sizeof(cl_float) , (void *)&k));
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.wholerows ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.wholecols ));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&Dx.step));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.wholerows ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.wholecols ));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&Dy.step));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+ args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back( std::make_pair( sizeof(cl_float) , (void *)&k));
+
openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, buildOptions.c_str());
}
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
- CV_Error(Error::OpenCLDoubleNotSupported, "Select device doesn't support double");
- CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
++ CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
{
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
{
- CV_Error(Error::OpenCLDoubleNotSupported, "select device don't support double");
- CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
++ CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
}
}
- void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers)
+ void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers, int distType, const oclMat &indices)
{
- //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
- //{
- // CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
- // return;
- //}
-
- Context *clCxt = src.clCxt;
- int labels_step = (int)(labels.step/labels.elemSize());
+ CV_Assert(src.cols*src.oclchannels() == centers.cols*centers.oclchannels());
+ CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
+ bool is_label_row_major = false;
+ ensureSizeIsEnough(1, src.rows, CV_32FC1, dists);
+ if(labels.empty() || (!labels.empty() && labels.rows == src.rows && labels.cols == 1))
+ {
+ ensureSizeIsEnough(src.rows, 1, CV_32SC1, labels);
+ is_label_row_major = true;
+ }
+ CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);
+
+ std::stringstream build_opt_ss;
+ build_opt_ss
+ << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST")
+ << (indices.empty() ? "" : " -D USE_INDEX");
+
+ String build_opt = build_opt_ss.str();
+
+ const int src_step = (int)(src.oclchannels() * src.step / src.elemSize());
+ const int centers_step = (int)(centers.oclchannels() * centers.step / centers.elemSize());
+
+ const int colsNumb = centers.cols*centers.oclchannels();
+
+ const int label_step = is_label_row_major ? (int)(labels.step / labels.elemSize()) : 1;
String kernelname = "distanceToCenters";
- int threadNum = src.rows > 256 ? 256 : src.rows;
- size_t localThreads[3] = {1, threadNum, 1};
- size_t globalThreads[3] = {1, src.rows, 1};
+
+ const int number_of_input = indices.empty() ? src.rows : indices.size().area();
+
+ const int src_offset = (int)src.offset/src.elemSize();
+ const int centers_offset = (int)centers.offset/centers.elemSize();
+
+ size_t globalThreads[3] = {number_of_input, 1, 1};
- vector<pair<size_t, const void *> > args;
- args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
- args.push_back(make_pair(sizeof(cl_mem), (void *)¢ers.data));
+ std::vector<std::pair<size_t, const void *> > args;
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&labels_step));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers.rows));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
- args.push_back(std::make_pair(sizeof(cl_mem), (void *)&labels.data));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers.cols));
- args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)¢ers.data));
- args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dists.data));
+ if(!indices.empty())
+ {
- args.push_back(make_pair(sizeof(cl_mem), (void *)&indices.data));
++ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&indices.data));
+ }
- args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data));
- args.push_back(make_pair(sizeof(cl_mem), (void *)&dists.data));
- args.push_back(make_pair(sizeof(cl_int), (void *)&colsNumb));
- args.push_back(make_pair(sizeof(cl_int), (void *)&src_step));
- args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_step));
- args.push_back(make_pair(sizeof(cl_int), (void *)&label_step));
- args.push_back(make_pair(sizeof(cl_int), (void *)&number_of_input));
- args.push_back(make_pair(sizeof(cl_int), (void *)¢ers.rows));
- args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset));
- args.push_back(make_pair(sizeof(cl_int), (void *)¢ers_offset));
++ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&labels.data));
++ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dists.data));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)&colsNumb));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_step));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers_step));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)&label_step));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)&number_of_input));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers.rows));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset));
++ args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers_offset));
- openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL);
+ openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
+ kernelname, globalThreads, NULL, args, -1, -1, build_opt.c_str());
}
///////////////////////////////////k - means /////////////////////////////////////////////////////////
double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
//
//M*/
#include "precomp.hpp"
+
+#include "opencv2/imgproc/types_c.h"
+#include "opencv2/imgproc/imgproc_c.h"
+
#include "opencl_kernels.hpp"
+ #if defined _MSC_VER
+ #define snprintf sprintf_s
+ #endif
namespace cv
{
- namespace ocl
- {
- // The function calculates center of gravity and the central second order moments
- static void icvCompleteMomentState( CvMoments* moments )
- {
- double cx = 0, cy = 0;
- double mu20, mu11, mu02;
-
- assert( moments != 0 );
- moments->inv_sqrt_m00 = 0;
-
- if( fabs(moments->m00) > DBL_EPSILON )
- {
- double inv_m00 = 1. / moments->m00;
- cx = moments->m10 * inv_m00;
- cy = moments->m01 * inv_m00;
- moments->inv_sqrt_m00 = std::sqrt( fabs(inv_m00) );
- }
-
- // mu20 = m20 - m10*cx
- mu20 = moments->m20 - moments->m10 * cx;
- // mu11 = m11 - m10*cy
- mu11 = moments->m11 - moments->m10 * cy;
- // mu02 = m02 - m01*cy
- mu02 = moments->m02 - moments->m01 * cy;
-
- moments->mu20 = mu20;
- moments->mu11 = mu11;
- moments->mu02 = mu02;
-
- // mu30 = m30 - cx*(3*mu20 + cx*m10)
- moments->mu30 = moments->m30 - cx * (3 * mu20 + cx * moments->m10);
- mu11 += mu11;
- // mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
- moments->mu21 = moments->m21 - cx * (mu11 + cx * moments->m01) - cy * mu20;
- // mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
- moments->mu12 = moments->m12 - cy * (mu11 + cy * moments->m10) - cx * mu02;
- // mu03 = m03 - cy*(3*mu02 + cy*m01)
- moments->mu03 = moments->m03 - cy * (3 * mu02 + cy * moments->m01);
- }
-
-
- static void icvContourMoments( CvSeq* contour, CvMoments* mom )
- {
- if( contour->total )
+ namespace ocl
{
- CvSeqReader reader;
- int lpt = contour->total;
- double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03;
-
- cvStartReadSeq( contour, &reader, 0 );
+ // The function calculates center of gravity and the central second order moments
+ static void icvCompleteMomentState( CvMoments* moments )
+ {
+ double cx = 0, cy = 0;
+ double mu20, mu11, mu02;
- size_t reader_size = lpt << 1;
- cv::Mat reader_mat(1,reader_size,CV_32FC1);
+ assert( moments != 0 );
+ moments->inv_sqrt_m00 = 0;
- bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
+ if( fabs(moments->m00) > DBL_EPSILON )
+ {
+ double inv_m00 = 1. / moments->m00;
+ cx = moments->m10 * inv_m00;
+ cy = moments->m01 * inv_m00;
+ moments->inv_sqrt_m00 = std::sqrt( fabs(inv_m00) );
+ }
- if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float)
- {
- CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
+ // mu20 = m20 - m10*cx
+ mu20 = moments->m20 - moments->m10 * cx;
+ // mu11 = m11 - m10*cy
+ mu11 = moments->m11 - moments->m10 * cy;
+ // mu02 = m02 - m01*cy
+ mu02 = moments->m02 - moments->m01 * cy;
+
+ moments->mu20 = mu20;
+ moments->mu11 = mu11;
+ moments->mu02 = mu02;
+
+ // mu30 = m30 - cx*(3*mu20 + cx*m10)
+ moments->mu30 = moments->m30 - cx * (3 * mu20 + cx * moments->m10);
+ mu11 += mu11;
+ // mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
+ moments->mu21 = moments->m21 - cx * (mu11 + cx * moments->m01) - cy * mu20;
+ // mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
+ moments->mu12 = moments->m12 - cy * (mu11 + cy * moments->m10) - cx * mu02;
+ // mu03 = m03 - cy*(3*mu02 + cy*m01)
+ moments->mu03 = moments->m03 - cy * (3 * mu02 + cy * moments->m01);
}
- if( is_float )
+
+ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
{
- for(size_t i = 0; i < reader_size; ++i)
+ if( contour->total )
{
- reader_mat.at<float>(0, i++) = ((CvPoint2D32f*)(reader.ptr))->x;
- reader_mat.at<float>(0, i) = ((CvPoint2D32f*)(reader.ptr))->y;
- CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
+ CvSeqReader reader;
+ int lpt = contour->total;
+ double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03;
+
+ cvStartReadSeq( contour, &reader, 0 );
+
+ size_t reader_size = lpt << 1;
+ cv::Mat reader_mat(1,reader_size,CV_32FC1);
+
+ bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
+
+ if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float)
+ {
+ CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
+ }
+
+ if( is_float )
+ {
+ for(size_t i = 0; i < reader_size; ++i)
+ {
+ reader_mat.at<float>(0, i++) = ((CvPoint2D32f*)(reader.ptr))->x;
+ reader_mat.at<float>(0, i) = ((CvPoint2D32f*)(reader.ptr))->y;
+ CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
+ }
+ }
+ else
+ {
+ for(size_t i = 0; i < reader_size; ++i)
+ {
+ reader_mat.at<float>(0, i++) = ((CvPoint*)(reader.ptr))->x;
+ reader_mat.at<float>(0, i) = ((CvPoint*)(reader.ptr))->y;
+ CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
+ }
+ }
+
+ cv::ocl::oclMat dst_a(10, lpt, CV_64FC1);
+ cv::ocl::oclMat reader_oclmat(reader_mat);
+ int llength = std::min(lpt,128);
+ size_t localThreads[3] = { llength, 1, 1};
+ size_t globalThreads[3] = { lpt, 1, 1};
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_int) , (void *)&contour->total ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
++ std::vector<std::pair<size_t , const void *> > args;
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&contour->total ));
++ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
++ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
+ cl_int dst_step = (cl_int)dst_a.step;
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step ));
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step ));
+
+ char builOption[128];
+ snprintf(builOption, 128, "-D CV_8UC1");
+
+ openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1, builOption);
+
+ cv::Mat dst(dst_a);
+ a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
+ if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+ {
+ for (int i = 0; i < contour->total; ++i)
+ {
+ a00 += dst.at<cl_long>(0, i);
+ a10 += dst.at<cl_long>(1, i);
+ a01 += dst.at<cl_long>(2, i);
+ a20 += dst.at<cl_long>(3, i);
+ a11 += dst.at<cl_long>(4, i);
+ a02 += dst.at<cl_long>(5, i);
+ a30 += dst.at<cl_long>(6, i);
+ a21 += dst.at<cl_long>(7, i);
+ a12 += dst.at<cl_long>(8, i);
+ a03 += dst.at<cl_long>(9, i);
+ }
+ }
+ else
+ {
+ a00 = cv::sum(dst.row(0))[0];
+ a10 = cv::sum(dst.row(1))[0];
+ a01 = cv::sum(dst.row(2))[0];
+ a20 = cv::sum(dst.row(3))[0];
+ a11 = cv::sum(dst.row(4))[0];
+ a02 = cv::sum(dst.row(5))[0];
+ a30 = cv::sum(dst.row(6))[0];
+ a21 = cv::sum(dst.row(7))[0];
+ a12 = cv::sum(dst.row(8))[0];
+ a03 = cv::sum(dst.row(9))[0];
+ }
+
+ double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
+ if( fabs(a00) > FLT_EPSILON )
+ {
+ if( a00 > 0 )
+ {
+ db1_2 = 0.5;
+ db1_6 = 0.16666666666666666666666666666667;
+ db1_12 = 0.083333333333333333333333333333333;
+ db1_24 = 0.041666666666666666666666666666667;
+ db1_20 = 0.05;
+ db1_60 = 0.016666666666666666666666666666667;
+ }
+ else
+ {
+ db1_2 = -0.5;
+ db1_6 = -0.16666666666666666666666666666667;
+ db1_12 = -0.083333333333333333333333333333333;
+ db1_24 = -0.041666666666666666666666666666667;
+ db1_20 = -0.05;
+ db1_60 = -0.016666666666666666666666666666667;
+ }
+
+ // spatial moments
+ mom->m00 = a00 * db1_2;
+ mom->m10 = a10 * db1_6;
+ mom->m01 = a01 * db1_6;
+ mom->m20 = a20 * db1_12;
+ mom->m11 = a11 * db1_24;
+ mom->m02 = a02 * db1_12;
+ mom->m30 = a30 * db1_20;
+ mom->m21 = a21 * db1_60;
+ mom->m12 = a12 * db1_60;
+ mom->m03 = a03 * db1_20;
+
+ icvCompleteMomentState( mom );
+ }
}
}
- else
+
+ Moments ocl_moments(oclMat& src, bool binary) //for image
{
- for(size_t i = 0; i < reader_size; ++i)
+ CV_Assert(src.oclchannels() == 1);
+ if(src.type() == CV_64FC1 && !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
- reader_mat.at<float>(0, i++) = ((CvPoint*)(reader.ptr))->x;
- reader_mat.at<float>(0, i) = ((CvPoint*)(reader.ptr))->y;
- CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
+ CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
}
- }
- cv::ocl::oclMat dst_a(10, lpt, CV_64FC1);
- cv::ocl::oclMat reader_oclmat(reader_mat);
- int llength = std::min(lpt,128);
- size_t localThreads[3] = { llength, 1, 1};
- size_t globalThreads[3] = { lpt, 1, 1};
- std::vector<std::pair<size_t , const void *> > args;
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&contour->total ));
- args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
- args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
- cl_int dst_step = (cl_int)dst_a.step;
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step ));
-
- openCLExecuteKernel2(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
-
- cv::Mat dst(dst_a);
- a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
- if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
- {
- for (int i = 0; i < contour->total; ++i)
+ if(binary)
{
- a00 += dst.at<cl_long>(0, i);
- a10 += dst.at<cl_long>(1, i);
- a01 += dst.at<cl_long>(2, i);
- a20 += dst.at<cl_long>(3, i);
- a11 += dst.at<cl_long>(4, i);
- a02 += dst.at<cl_long>(5, i);
- a30 += dst.at<cl_long>(6, i);
- a21 += dst.at<cl_long>(7, i);
- a12 += dst.at<cl_long>(8, i);
- a03 += dst.at<cl_long>(9, i);
+ oclMat mask;
+ if(src.type() != CV_8UC1)
+ {
+ src.convertTo(mask, CV_8UC1);
+ }
+ oclMat src8u(src.size(), CV_8UC1);
+ src8u.setTo(Scalar(255), mask);
+ src = src8u;
}
- }
- else
- {
- a00 = cv::sum(dst.row(0))[0];
- a10 = cv::sum(dst.row(1))[0];
- a01 = cv::sum(dst.row(2))[0];
- a20 = cv::sum(dst.row(3))[0];
- a11 = cv::sum(dst.row(4))[0];
- a02 = cv::sum(dst.row(5))[0];
- a30 = cv::sum(dst.row(6))[0];
- a21 = cv::sum(dst.row(7))[0];
- a12 = cv::sum(dst.row(8))[0];
- a03 = cv::sum(dst.row(9))[0];
- }
+ const int TILE_SIZE = 256;
- double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
- if( fabs(a00) > FLT_EPSILON )
- {
- if( a00 > 0 )
+ CvMoments mom;
+ memset(&mom, 0, sizeof(mom));
+
+ cv::Size size = src.size();
+ int blockx, blocky;
+ blockx = (size.width + TILE_SIZE - 1)/TILE_SIZE;
+ blocky = (size.height + TILE_SIZE - 1)/TILE_SIZE;
+
+ oclMat dst_m;
+ int tile_height = TILE_SIZE;
+
+ size_t localThreads[3] = {1, tile_height, 1};
+ size_t globalThreads[3] = {blockx, size.height, 1};
+
+ if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
{
- db1_2 = 0.5;
- db1_6 = 0.16666666666666666666666666666667;
- db1_12 = 0.083333333333333333333333333333333;
- db1_24 = 0.041666666666666666666666666666667;
- db1_20 = 0.05;
- db1_60 = 0.016666666666666666666666666666667;
+ dst_m.create(blocky * 10, blockx, CV_64FC1);
+ }else
+ {
+ dst_m.create(blocky * 10, blockx, CV_32FC1);
}
- vector<pair<size_t , const void *> > args,args_sum;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dstm_step ));
+
+ int src_step = (int)(src.step/src.elemSize());
+ int dstm_step = (int)(dst_m.step/dst_m.elemSize());
+
++ std::vector<std::pair<size_t , const void *> > args,args_sum;
++ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step ));
++ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstm_step ));
+
+ int binary_;
+ if(binary)
+ binary_ = 1;
else
- args.push_back( make_pair( sizeof(cl_int) , (void *)&binary_));
+ binary_ = 0;
++ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary_));
+
+ char builOption[128];
+ if(binary || src.type() == CV_8UC1)
+ {
+ snprintf(builOption, 128, "-D CV_8UC1");
+ }else if(src.type() == CV_16UC1)
{
- db1_2 = -0.5;
- db1_6 = -0.16666666666666666666666666666667;
- db1_12 = -0.083333333333333333333333333333333;
- db1_24 = -0.041666666666666666666666666666667;
- db1_20 = -0.05;
- db1_60 = -0.016666666666666666666666666666667;
+ snprintf(builOption, 128, "-D CV_16UC1");
+ }else if(src.type() == CV_16SC1)
+ {
+ snprintf(builOption, 128, "-D CV_16SC1");
+ }else if(src.type() == CV_32FC1)
+ {
+ snprintf(builOption, 128, "-D CV_32FC1");
+ }else if(src.type() == CV_64FC1)
+ {
+ snprintf(builOption, 128, "-D CV_64FC1");
+ }else
+ {
+ CV_Error( CV_StsUnsupportedFormat, "" );
}
- // spatial moments
- mom->m00 = a00 * db1_2;
- mom->m10 = a10 * db1_6;
- mom->m01 = a01 * db1_6;
- mom->m20 = a20 * db1_12;
- mom->m11 = a11 * db1_24;
- mom->m02 = a02 * db1_12;
- mom->m30 = a30 * db1_20;
- mom->m21 = a21 * db1_60;
- mom->m12 = a12 * db1_60;
- mom->m03 = a03 * db1_20;
-
- icvCompleteMomentState( mom );
- }
- }
- }
-
- static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
- {
- const int TILE_SIZE = 256;
- int type, depth, cn, coi = 0;
- CvMat stub, *mat = (CvMat*)array;
- CvContour contourHeader;
- CvSeq* contour = 0;
- CvSeqBlock block;
- if( CV_IS_SEQ( array ))
- {
- contour = (CvSeq*)array;
- if( !CV_IS_SEQ_POINT_SET( contour ))
- CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
- }
+ openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, -1, builOption);
- if( !mom )
- CV_Error( CV_StsNullPtr, "" );
+ Mat tmp(dst_m);
+ tmp.convertTo(tmp, CV_64FC1);
- memset( mom, 0, sizeof(*mom));
+ double tmp_m[10] = {0};
- if( !contour )
- {
+ for(int j = 0; j < tmp.rows; j += 10)
+ {
+ for(int i = 0; i < tmp.cols; i++)
+ {
+ tmp_m[0] += tmp.at<double>(j, i);
+ tmp_m[1] += tmp.at<double>(j + 1, i);
+ tmp_m[2] += tmp.at<double>(j + 2, i);
+ tmp_m[3] += tmp.at<double>(j + 3, i);
+ tmp_m[4] += tmp.at<double>(j + 4, i);
+ tmp_m[5] += tmp.at<double>(j + 5, i);
+ tmp_m[6] += tmp.at<double>(j + 6, i);
+ tmp_m[7] += tmp.at<double>(j + 7, i);
+ tmp_m[8] += tmp.at<double>(j + 8, i);
+ tmp_m[9] += tmp.at<double>(j + 9, i);
+ }
+ }
- mat = cvGetMat( mat, &stub, &coi );
- type = CV_MAT_TYPE( mat->type );
+ mom.m00 = tmp_m[0];
+ mom.m10 = tmp_m[1];
+ mom.m01 = tmp_m[2];
+ mom.m20 = tmp_m[3];
+ mom.m11 = tmp_m[4];
+ mom.m02 = tmp_m[5];
+ mom.m30 = tmp_m[6];
+ mom.m21 = tmp_m[7];
+ mom.m12 = tmp_m[8];
+ mom.m03 = tmp_m[9];
+ icvCompleteMomentState( &mom );
+ return mom;
+ }
- if( type == CV_32SC2 || type == CV_32FC2 )
+ Moments ocl_moments(InputArray _contour) //for contour
{
- contour = cvPointSeqFromMat(
- CV_SEQ_KIND_CURVE | CV_SEQ_FLAG_CLOSED,
- mat, &contourHeader, &block );
- }
- }
- if( contour )
- {
- icvContourMoments( contour, mom );
- return;
- }
+ CvMoments mom;
+ memset(&mom, 0, sizeof(mom));
- type = CV_MAT_TYPE( mat->type );
- depth = CV_MAT_DEPTH( type );
- cn = CV_MAT_CN( type );
-
- cv::Size size = cvGetMatSize( mat );
- if( cn > 1 && coi == 0 )
- CV_Error( CV_StsBadArg, "Invalid image type" );
-
- if( size.width <= 0 || size.height <= 0 )
- return;
-
- cv::Mat src0 = cv::cvarrToMat(mat);
- cv::ocl::oclMat src(src0);
- cv::Size tileSize;
- int blockx,blocky;
- if(size.width%TILE_SIZE == 0)
- blockx = size.width/TILE_SIZE;
- else
- blockx = size.width/TILE_SIZE + 1;
- if(size.height%TILE_SIZE == 0)
- blocky = size.height/TILE_SIZE;
- else
- blocky = size.height/TILE_SIZE + 1;
- oclMat dst_m(blocky * 10, blockx, CV_64FC1);
- oclMat sum(1, 10, CV_64FC1);
- int tile_width = std::min(size.width,TILE_SIZE);
- int tile_height = std::min(size.height,TILE_SIZE);
- size_t localThreads[3] = { tile_height, 1, 1};
- size_t globalThreads[3] = { size.height, blockx, 1};
- std::vector<std::pair<size_t , const void *> > args,args_sum;
- args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
- args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&blocky ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&depth ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cn ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&coi ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
- openCLExecuteKernel2(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
-
- size_t localThreadss[3] = { 128, 1, 1};
- size_t globalThreadss[3] = { 128, 1, 1};
- args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&tile_height ));
- args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&tile_width ));
- args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
- args_sum.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- args_sum.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
- args_sum.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
- openCLExecuteKernel2(Context::getContext(), &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
-
- Mat dstsum(sum);
- mom->m00 = dstsum.at<double>(0, 0);
- mom->m10 = dstsum.at<double>(0, 1);
- mom->m01 = dstsum.at<double>(0, 2);
- mom->m20 = dstsum.at<double>(0, 3);
- mom->m11 = dstsum.at<double>(0, 4);
- mom->m02 = dstsum.at<double>(0, 5);
- mom->m30 = dstsum.at<double>(0, 6);
- mom->m21 = dstsum.at<double>(0, 7);
- mom->m12 = dstsum.at<double>(0, 8);
- mom->m03 = dstsum.at<double>(0, 9);
-
- icvCompleteMomentState( mom );
- }
+ Mat arr = _contour.getMat();
+ CvMat c_array = arr;
+ const void* array = &c_array;
- Moments ocl_moments( InputArray _array, bool binaryImage )
- {
- CvMoments om;
- Mat arr = _array.getMat();
- CvMat c_array = arr;
- ocl_cvMoments(&c_array, &om, binaryImage);
- return om;
- }
+ CvSeq* contour = 0;
+ if( CV_IS_SEQ( array ))
+ {
+ contour = (CvSeq*)(array);
+ if( !CV_IS_SEQ_POINT_SET( contour ))
+ CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
+ }
- }
+ int type, coi = 0;
+
+ CvMat stub, *mat = (CvMat*)(array);
+ CvContour contourHeader;
+ CvSeqBlock block;
+
+ if( !contour )
+ {
+ mat = cvGetMat( mat, &stub, &coi );
+ type = CV_MAT_TYPE( mat->type );
+
+ if( type == CV_32SC2 || type == CV_32FC2 )
+ {
+ contour = cvPointSeqFromMat(
+ CV_SEQ_KIND_CURVE | CV_SEQ_FLAG_CLOSED,
+ mat, &contourHeader, &block );
+ }
+ }
+
+ CV_Assert(contour);
-}
+ icvContourMoments(contour, &mom);
+ return mom;
+ }
+ }
+}
static inline void ___openCLSafeCall(int err, const char *file, const int line, const char *func = "")
{
- if( CL_SUCCESS != err)
+ if (CL_SUCCESS != err)
- cv::ocl::error(getOpenCLErrorString(err), file, line, func);
+ cv::error(Error::OpenCLApiCallError, getOpenCLErrorString(err), func, file, line);
}
}
}
mat_dst.create(size, CV_MAKETYPE(depth, total_channels));
merge_vector_run(mat_src, n, mat_dst);
}
- static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
+ static void split_vector_run(const oclMat &src, oclMat *dst)
{
- if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F)
+ if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
{
- CV_Error(CV_OpenCLDoubleNotSupported, "Selected device doesn't support double");
+ CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
return;
}
- Context *clCxt = mat_src.clCxt;
- int channels = mat_src.oclchannels();
- int depth = mat_src.depth();
+ Context *clCtx = src.clCxt;
+ int channels = src.channels();
+ int depth = src.depth();
+ depth = (depth == CV_8S) ? CV_8U : depth;
+ depth = (depth == CV_16S) ? CV_16U : depth;
- string kernelName = "split_vector";
+ String kernelName = "split_vector";
- int vector_lengths[4][7] = {{0, 0, 0, 0, 0, 0, 0},
- {4, 4, 2, 2, 1, 1, 1},
- {4, 4, 2, 2 , 1, 1, 1},
- {4, 4, 2, 2, 1, 1, 1}
- };
-
- size_t vector_length = vector_lengths[channels - 1][mat_dst[0].depth()];
-
- int max_offset_cols = 0;
- for(int i = 0; i < channels; i++)
- {
- int offset_cols = (mat_dst[i].offset / mat_dst[i].elemSize()) & (vector_length - 1);
- if(max_offset_cols < offset_cols)
- max_offset_cols = offset_cols;
- }
-
- int cols = vector_length == 1 ? divUp(mat_src.cols, vector_length)
- : divUp(mat_src.cols + max_offset_cols, vector_length);
-
- size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { cols, mat_src.rows, 1 };
+ size_t VEC_SIZE = 4;
- int dst_step1 = mat_dst[0].cols * mat_dst[0].elemSize();
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
+ std::vector<std::pair<size_t , const void *> > args;
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src.data));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.step));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.offset));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[0].step));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[0].offset));
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[1].step));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[1].offset));
- if(channels >= 3)
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step));
+ int srcOffsetXBytes = src.offset % src.step;
+ int srcOffsetY = src.offset / src.step;
+ cl_int2 srcOffset = {{srcOffsetXBytes, srcOffsetY}};
- args.push_back( make_pair( sizeof(cl_int2), (void *)&srcOffset));
++ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&srcOffset));
+
+ bool dst0Aligned = false, dst1Aligned = false, dst2Aligned = false, dst3Aligned = false;
+ int alignSize = dst[0].elemSize1() * VEC_SIZE;
+ int alignMask = alignSize - 1;
+
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst[0].data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst[0].step));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[0].data));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[0].step));
+ int dst0OffsetXBytes = dst[0].offset % dst[0].step;
+ int dst0OffsetY = dst[0].offset / dst[0].step;
+ cl_int2 dst0Offset = {{dst0OffsetXBytes, dst0OffsetY}};
- args.push_back( make_pair( sizeof(cl_int2), (void *)&dst0Offset));
++ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst0Offset));
+ if ((dst0OffsetXBytes & alignMask) == 0)
+ dst0Aligned = true;
+
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst[1].data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst[1].step));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[1].data));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[1].step));
+ int dst1OffsetXBytes = dst[1].offset % dst[1].step;
+ int dst1OffsetY = dst[1].offset / dst[1].step;
+ cl_int2 dst1Offset = {{dst1OffsetXBytes, dst1OffsetY}};
- args.push_back( make_pair( sizeof(cl_int2), (void *)&dst1Offset));
++ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst1Offset));
+ if ((dst1OffsetXBytes & alignMask) == 0)
+ dst1Aligned = true;
+
+ // DON'T MOVE VARIABLES INTO 'IF' BODY
+ int dst2OffsetXBytes, dst2OffsetY;
+ cl_int2 dst2Offset;
+ int dst3OffsetXBytes, dst3OffsetY;
+ cl_int2 dst3Offset;
+ if (channels >= 3)
{
-
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[2].step));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[2].offset));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst[2].data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst[2].step));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[2].data));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[2].step));
+ dst2OffsetXBytes = dst[2].offset % dst[2].step;
+ dst2OffsetY = dst[2].offset / dst[2].step;
+ dst2Offset.s[0] = dst2OffsetXBytes; dst2Offset.s[1] = dst2OffsetY;
- args.push_back( make_pair( sizeof(cl_int2), (void *)&dst2Offset));
++ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst2Offset));
+ if ((dst2OffsetXBytes & alignMask) == 0)
+ dst2Aligned = true;
}
- if(channels >= 4)
+
+ if (channels >= 4)
{
- args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[3].step));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[3].offset));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst[3].data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst[3].step));
++ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[3].data));
++ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[3].step));
+ dst3OffsetXBytes = dst[3].offset % dst[3].step;
+ dst3OffsetY = dst[3].offset / dst[3].step;
+ dst3Offset.s[0] = dst3OffsetXBytes; dst3Offset.s[1] = dst3OffsetY;
- args.push_back( make_pair( sizeof(cl_int2), (void *)&dst3Offset));
++ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst3Offset));
+ if ((dst3OffsetXBytes & alignMask) == 0)
+ dst3Aligned = true;
}
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.rows));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
- args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1));
-
- openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth);
+ cl_int2 size = {{ src.cols, src.rows }};
- args.push_back( make_pair( sizeof(cl_int2), (void *)&size));
++ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&size));
+
- string build_options =
++ String build_options =
+ cv::format("-D VEC_SIZE=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d",
+ (int)VEC_SIZE, depth, channels);
+
+ if (dst0Aligned)
- build_options += " -D DST0_ALIGNED";
++ build_options = build_options + " -D DST0_ALIGNED";
+ if (dst1Aligned)
- build_options += " -D DST1_ALIGNED";
++ build_options = build_options + " -D DST1_ALIGNED";
+ if (dst2Aligned)
- build_options += " -D DST2_ALIGNED";
++ build_options = build_options + " -D DST2_ALIGNED";
+ if (dst3Aligned)
- build_options += " -D DST3_ALIGNED";
++ build_options = build_options + " -D DST3_ALIGNED";
+
+ const DeviceInfo& devInfo = clCtx->getDeviceInfo();
+
+ // TODO Workaround for issues. Need to investigate a problem.
+ if (channels == 2
+ && devInfo.deviceType == CVCL_DEVICE_TYPE_CPU
+ && devInfo.platform->platformVendor.find("Intel") != std::string::npos
+ && (devInfo.deviceVersion.find("Build 56860") != std::string::npos
+ || devInfo.deviceVersion.find("Build 76921") != std::string::npos))
- build_options += " -D BYPASS_VSTORE=true";
++ build_options = build_options + " -D BYPASS_VSTORE=true";
+
+ size_t globalThreads[3] = { divUp(src.cols, VEC_SIZE), src.rows, 1 };
+ openCLExecuteKernel(clCtx, &split_mat, kernelName, globalThreads, NULL, args, -1, -1, build_options.c_str());
}
static void split(const oclMat &mat_src, oclMat *mat_dst)
{
{
split_merge::split(src, dst);
}
-void cv::ocl::split(const oclMat &src, vector<oclMat> &dst)
+void cv::ocl::split(const oclMat &src, std::vector<oclMat> &dst)
{
- dst.resize(src.oclchannels());
+ dst.resize(src.oclchannels()); // TODO Why oclchannels?
if(src.oclchannels() > 0)
split_merge::split(src, &dst[0]);
}
for(int j = 0; j < LOOP_TIMES; j++)
{
kmeans(src, K, labels,
- TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0),
+ TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
1, flags, centers);
-
ocl::kmeans(d_src, K, d_labels,
- TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, 0),
+ TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
1, flags, d_centers);
-
Mat dd_labels(d_labels);
Mat dd_centers(d_centers);
if(flags & KMEANS_USE_INITIAL_LABELS)
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
- PARAM_TEST_CASE(MomentsTest, MatType, bool)
-using namespace std;
-
+ PARAM_TEST_CASE(MomentsTest, MatType, bool, bool)
{
int type;
- cv::Mat mat1;
+ cv::Mat mat;
bool test_contours;
-
+ bool binaryImage;
virtual void SetUp()
{
type = GET_PARAM(0);
test_contours = GET_PARAM(1);
- cv::Size size(10*MWIDTH, 10*MHEIGHT);
- mat1 = randomMat(size, type, 5, 16, false);
+ cv::Size size(10 * MWIDTH, 10 * MHEIGHT);
+ mat = randomMat(size, type, 0, 256, false);
+ binaryImage = GET_PARAM(2);
}
- void Compare(Moments& cpu, Moments& gpu)
+ void Compare(Moments& cpu_moments, Moments& gpu_moments)
{
Mat gpu_dst, cpu_dst;
- HuMoments(cpu, cpu_dst);
- HuMoments(gpu, gpu_dst);
- EXPECT_MAT_NEAR(gpu_dst,cpu_dst, 1e-3);
+ HuMoments(cpu_moments, cpu_dst);
+ HuMoments(gpu_moments, gpu_dst);
- EXPECT_MAT_NEAR(gpu_dst, cpu_dst, .5);
++ EXPECT_MAT_NEAR(gpu_dst, cpu_dst, 1e-3);
}
-
};
-
OCL_TEST_P(MomentsTest, Mat)
{
- bool binaryImage = 0;
-
+ oclMat src_d(mat);
for(int j = 0; j < LOOP_TIMES; j++)
{
if(test_contours)
}
}
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MomentsTest, Combine(
- Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_64FC1), Values(true,false)));
+ Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1, CV_64FC1), Values(false, true), Values(false, true)));
+
#endif // HAVE_OPENCL
#include "perf_precomp.hpp"
- #ifdef HAVE_OPENCL
+ #ifdef HAVE_OPENCV_OCL
-#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/ocl.hpp"
using namespace std;
using namespace testing;
using namespace perf;
if(HAVE_opencv_nonfree)
target_link_libraries(${the_target} opencv_nonfree)
endif()
+ if(HAVE_opencv_cudacodec)
+ target_link_libraries(${the_target} opencv_cudacodec)
+ endif()
- if(HAVE_OPENCL)
+ if(HAVE_opencv_ocl)
target_link_libraries(${the_target} opencv_ocl)
endif()