add_definitions(-DJAS_WIN_MSVC_BUILD)
endif(WIN32 AND NOT MINGW)
- ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized
- -Wmissing-prototypes -Wmissing-declarations -Wunused -Wshadow
+ ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes
- -Wno-unused-but-set-parameter -Wmissing-declarations -Wunused -Wshadow -Wsign-compare)
++ -Wno-unused-but-set-parameter -Wmissing-declarations -Wunused -Wshadow
+ -Wsign-compare -Wstrict-overflow)
ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang
ocv_warnings_disable(CMAKE_C_FLAGS /wd4013 /wd4018 /wd4101 /wd4244 /wd4267 /wd4715) # vs2005
if(NOT BUILD_SHARED_LIBS)
install(TARGETS ${JASPER_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main)
endif()
--
.. [Hartley99] Hartley, R.I., Theory and Practice of Projective Rectification. IJCV 35 2, pp 115-127 (1999)
+.. [HartleyZ00] Hartley, R. and Zisserman, A. Multiple View Geomtry in Computer Vision, Cambridge University Press, 2000.
+
.. [HH08] Hirschmuller, H. Stereo Processing by Semiglobal Matching and Mutual Information, PAMI(30), No. 2, February 2008, pp. 328-341.
- .. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://gregslabaugh.name/publications/euler.pdf
+.. [Nister03] Nistér, D. An efficient solution to the five-point relative pose problem, CVPR 2003.
+
+.. [SteweniusCFS] Stewénius, H., Calibrated Fivepoint solver. http://www.vis.uky.edu/~stewe/FIVEPOINT/
+
+ .. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://www.soi.city.ac.uk/~sbbh653/publications/euler.pdf (verified: 2013-04-15)
.. [Zhang2000] Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.
src/window.cpp
)
-file(GLOB highgui_ext_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
+file(GLOB highgui_ext_hdrs "include/opencv2/*.hpp" "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
- if(HAVE_QT)
+ if(HAVE_QT5)
+ set(CMAKE_AUTOMOC ON)
+ set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
+ QT5_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc)
+ list(APPEND highgui_srcs src/window_QT.cpp src/window_QT.h ${_RCC_OUTFILES})
+
+ foreach(dt5_dep Core Gui Widgets Test Concurrent)
+ add_definitions(${Qt5${dt5_dep}_DEFINITIONS})
+ include_directories(${Qt5${dt5_dep}_INCLUDE_DIRS})
+ list(APPEND HIGHGUI_LIBRARIES ${Qt5${dt5_dep}_LIBRARIES})
+ endforeach()
+
+ if(HAVE_QT_OPENGL)
+ add_definitions(${Qt5OpenGL_DEFINITIONS})
+ include_directories(${Qt5OpenGL_INCLUDE_DIRS})
+ list(APPEND HIGHGUI_LIBRARIES ${Qt5OpenGL_LIBRARIES})
+ endif()
+
+ elseif(HAVE_QT)
if (HAVE_QT_OPENGL)
set(QT_USE_QTOPENGL TRUE)
endif()
template<typename _Tp>
static void
-icvFloodFill_CnIR( uchar* pImage, int step, CvSize roi, CvPoint seed,
- _Tp newVal, CvConnectedComp* region, int flags,
- std::vector<CvFFillSegment>* buffer )
+floodFill_CnIR( Mat& image, Point seed,
+ _Tp newVal, ConnectedComp* region, int flags,
+ std::vector<FFillSegment>* buffer )
{
- typedef typename DataType<_Tp>::channel_type _CTp;
- _Tp* img = (_Tp*)(pImage + step * seed.y);
+ _Tp* img = (_Tp*)(image.data + image.step * seed.y);
+ Size roi = image.size();
int i, L, R;
int area = 0;
int XMin, XMax, YMin = seed.y, YMax = seed.y;
};
typedef DiffC1<int> Diff32sC1;
-typedef DiffC3<cv::Vec3i> Diff32sC3;
+typedef DiffC3<Vec3i> Diff32sC3;
typedef DiffC1<float> Diff32fC1;
-typedef DiffC3<cv::Vec3f> Diff32fC3;
-
-static cv::Vec3i& operator += (cv::Vec3i& a, const cv::Vec3b& b)
-{
- a[0] += b[0];
- a[1] += b[1];
- a[2] += b[2];
- return a;
-}
+typedef DiffC3<Vec3f> Diff32fC3;
-template<typename _Tp, typename _WTp, class Diff>
+template<typename _Tp, typename _MTp, typename _WTp, class Diff>
static void
-icvFloodFillGrad_CnIR( uchar* pImage, int step, uchar* pMask, int maskStep,
- CvSize /*roi*/, CvPoint seed, _Tp newVal, Diff diff,
- CvConnectedComp* region, int flags,
- std::vector<CvFFillSegment>* buffer )
+floodFillGrad_CnIR( Mat& image, Mat& msk,
+ Point seed, _Tp newVal, _MTp newMaskVal,
+ Diff diff, ConnectedComp* region, int flags,
+ std::vector<FFillSegment>* buffer )
{
- typedef typename DataType<_Tp>::channel_type _CTp;
+ int step = (int)image.step, maskStep = (int)msk.step;
+ uchar* pImage = image.data;
_Tp* img = (_Tp*)(pImage + step*seed.y);
- uchar* mask = (pMask += maskStep + 1) + maskStep*seed.y;
+ uchar* pMask = msk.data + maskStep + sizeof(_MTp);
+ _MTp* mask = (_MTp*)(pMask + maskStep*seed.y);
int i, L, R;
int area = 0;
- _WTp sum = _WTp((typename cv::DataType<_Tp>::channel_type)0);
int XMin, XMax, YMin = seed.y, YMax = seed.y;
int _8_connectivity = (flags & 255) == 8;
- int fixedRange = flags & CV_FLOODFILL_FIXED_RANGE;
- int fillImage = (flags & CV_FLOODFILL_MASK_ONLY) == 0;
- uchar newMaskVal = (uchar)(flags & 0xff00 ? flags >> 8 : 1);
- CvFFillSegment* buffer_end = &buffer->front() + buffer->size(), *head = &buffer->front(), *tail = &buffer->front();
+ int fixedRange = flags & FLOODFILL_FIXED_RANGE;
+ int fillImage = (flags & FLOODFILL_MASK_ONLY) == 0;
+ FFillSegment* buffer_end = &buffer->front() + buffer->size(), *head = &buffer->front(), *tail = &buffer->front();
L = R = seed.x;
if( mask[L] )
else
CV_Error( CV_StsUnsupportedFormat, "" );
+ uchar newMaskVal = (uchar)((flags & ~0xff) == 0 ? 1 : ((flags >> 8) & 255));
+
if( type == CV_8UC1 )
- icvFloodFillGrad_CnIR<uchar, int, Diff8uC1>(
- img->data.ptr, img->step, mask->data.ptr, mask->step,
- size, seed_point, nv_buf.b[0],
- Diff8uC1(ld_buf.b[0], ud_buf.b[0]),
- comp, flags, &buffer);
+ floodFillGrad_CnIR<uchar, uchar, int, Diff8uC1>(
+ img, mask, seedPoint, nv_buf.b[0], newMaskVal,
+ Diff8uC1(ld_buf.b[0], ud_buf.b[0]),
+ &comp, flags, &buffer);
else if( type == CV_8UC3 )
- icvFloodFillGrad_CnIR<cv::Vec3b, cv::Vec3i, Diff8uC3>(
- img->data.ptr, img->step, mask->data.ptr, mask->step,
- size, seed_point, cv::Vec3b(nv_buf.b),
- Diff8uC3(ld_buf.b, ud_buf.b),
- comp, flags, &buffer);
+ floodFillGrad_CnIR<Vec3b, uchar, Vec3i, Diff8uC3>(
+ img, mask, seedPoint, Vec3b(nv_buf.b), newMaskVal,
+ Diff8uC3(ld_buf.b, ud_buf.b),
+ &comp, flags, &buffer);
else if( type == CV_32SC1 )
- icvFloodFillGrad_CnIR<int, int, Diff32sC1>(
- img->data.ptr, img->step, mask->data.ptr, mask->step,
- size, seed_point, nv_buf.i[0],
- Diff32sC1(ld_buf.i[0], ud_buf.i[0]),
- comp, flags, &buffer);
+ floodFillGrad_CnIR<int, uchar, int, Diff32sC1>(
+ img, mask, seedPoint, nv_buf.i[0], newMaskVal,
+ Diff32sC1(ld_buf.i[0], ud_buf.i[0]),
+ &comp, flags, &buffer);
else if( type == CV_32SC3 )
- icvFloodFillGrad_CnIR<cv::Vec3i, cv::Vec3i, Diff32sC3>(
- img->data.ptr, img->step, mask->data.ptr, mask->step,
- size, seed_point, cv::Vec3i(nv_buf.i),
- Diff32sC3(ld_buf.i, ud_buf.i),
- comp, flags, &buffer);
+ floodFillGrad_CnIR<Vec3i, uchar, Vec3i, Diff32sC3>(
+ img, mask, seedPoint, Vec3i(nv_buf.i), newMaskVal,
+ Diff32sC3(ld_buf.i, ud_buf.i),
+ &comp, flags, &buffer);
else if( type == CV_32FC1 )
- icvFloodFillGrad_CnIR<float, float, Diff32fC1>(
- img->data.ptr, img->step, mask->data.ptr, mask->step,
- size, seed_point, nv_buf.f[0],
- Diff32fC1(ld_buf.f[0], ud_buf.f[0]),
- comp, flags, &buffer);
+ floodFillGrad_CnIR<float, uchar, float, Diff32fC1>(
+ img, mask, seedPoint, nv_buf.f[0], newMaskVal,
+ Diff32fC1(ld_buf.f[0], ud_buf.f[0]),
+ &comp, flags, &buffer);
else if( type == CV_32FC3 )
- icvFloodFillGrad_CnIR<cv::Vec3f, cv::Vec3f, Diff32fC3>(
- img->data.ptr, img->step, mask->data.ptr, mask->step,
- size, seed_point, cv::Vec3f(nv_buf.f),
- Diff32fC3(ld_buf.f, ud_buf.f),
- comp, flags, &buffer);
+ floodFillGrad_CnIR<Vec3f, uchar, Vec3f, Diff32fC3>(
+ img, mask, seedPoint, Vec3f(nv_buf.f), newMaskVal,
+ Diff32fC3(ld_buf.f, ud_buf.f),
+ &comp, flags, &buffer);
else
CV_Error(CV_StsUnsupportedFormat, "");
-
++
+ if( rect )
+ *rect = comp.rect;
+ return comp.area;
}
--- /dev/null
- enum {CL_DOUBLE, CL_UNIFIED_MEM};
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_OCL_HPP__
+#define __OPENCV_OCL_HPP__
+
+#include <memory>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/objdetect.hpp"
+
+namespace cv
+{
+ namespace ocl
+ {
+ enum
+ {
+ CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
+ CVCL_DEVICE_TYPE_CPU = (1 << 1),
+ CVCL_DEVICE_TYPE_GPU = (1 << 2),
+ CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
+ //CVCL_DEVICE_TYPE_CUSTOM = (1 << 4)
+ CVCL_DEVICE_TYPE_ALL = 0xFFFFFFFF
+ };
+
+ enum DevMemRW
+ {
+ DEVICE_MEM_R_W = 0,
+ DEVICE_MEM_R_ONLY,
+ DEVICE_MEM_W_ONLY
+ };
+
+ enum DevMemType
+ {
+ DEVICE_MEM_DEFAULT = 0,
+ DEVICE_MEM_AHP, //alloc host pointer
+ DEVICE_MEM_UHP, //use host pointer
+ DEVICE_MEM_CHP, //copy host pointer
+ DEVICE_MEM_PM //persistent memory
+ };
+
+ //Get the global device memory and read/write type
+ //return 1 if unified memory system supported, otherwise return 0
+ CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
+
+ //Set the global device memory and read/write type,
+ //the newly generated oclMat will all use this type
+ //return -1 if the target type is unsupported, otherwise return 0
+ CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
+
+ //this class contains ocl runtime information
+ class CV_EXPORTS Info
+ {
+ public:
+ struct Impl;
+ Impl *impl;
+
+ Info();
+ Info(const Info &m);
+ ~Info();
+ void release();
+ Info &operator = (const Info &m);
+ std::vector<String> DeviceName;
+ String PlatformName;
+ };
+ //////////////////////////////// Initialization & Info ////////////////////////
+ //this function may be obsoleted
+ //CV_EXPORTS cl_device_id getDevice();
+ //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
+ //each Info relates to an OpenCL platform
+ //there is one or more devices in each platform, each one has a separate name
+ CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
+
+ //set device you want to use, optional function after getDevice be called
+ //the devnum is the index of the selected device in DeviceName vector of INfo
+ CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
+
+ //optional function, if you want save opencl binary kernel to the file, set its path
+ CV_EXPORTS void setBinpath(const char *path);
+
+ //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+ CV_EXPORTS void* getoclContext();
+
+ CV_EXPORTS void* getoclCommandQueue();
+
+ //explicit call clFinish. The global command queue will be used.
+ CV_EXPORTS void finish();
+
+ //this function enable ocl module to use customized cl_context and cl_command_queue
+ //getDevice also need to be called before this function
+ CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
+
+ //////////////////////////////// OpenCL context ////////////////////////
+ //This is a global singleton class used to represent a OpenCL context.
+ class CV_EXPORTS Context
+ {
+ protected:
+ Context();
+ friend class std::auto_ptr<Context>;
+
+ private:
+ static std::auto_ptr<Context> clCxt;
+ static int val;
+ public:
+ ~Context();
+ void release();
+ Info::Impl* impl;
+
+ static Context *getContext();
+ static void setContext(Info &oclinfo);
+
- void createEx(int rows, int cols, int type,
++ enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
+ bool supportsFeature(int ftype);
+ size_t computeUnits();
+ size_t maxWorkGroupSize();
+ void* oclContext();
+ void* oclCommandQueue();
+ };
+
+ //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ const char **source, String kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ std::vector< std::pair<size_t, const void *> > &args,
+ int channels, int depth, const char *build_options,
+ bool finish = true, bool measureKernelTime = false,
+ bool cleanUp = true);
+
+ //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ const char **fileName, const int numFiles, String kernelName,
+ size_t globalThreads[3], size_t localThreads[3],
+ std::vector< std::pair<size_t, const void *> > &args,
+ int channels, int depth, const char *build_options,
+ bool finish = true, bool measureKernelTime = false,
+ bool cleanUp = true);
+
+ class CV_EXPORTS oclMatExpr;
+ //////////////////////////////// oclMat ////////////////////////////////
+ class CV_EXPORTS oclMat
+ {
+ public:
+ //! default constructor
+ oclMat();
+ //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ oclMat(int rows, int cols, int type);
+ oclMat(Size size, int type);
+ //! constucts oclMatrix and fills it with the specified value _s.
+ oclMat(int rows, int cols, int type, const Scalar &s);
+ oclMat(Size size, int type, const Scalar &s);
+ //! copy constructor
+ oclMat(const oclMat &m);
+
+ //! constructor for oclMatrix headers pointing to user-allocated data
+ oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
+ oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
+
+ //! creates a matrix header for a part of the bigger matrix
+ oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
+ oclMat(const oclMat &m, const Rect &roi);
+
+ //! builds oclMat from Mat. Perfom blocking upload to device.
+ explicit oclMat (const Mat &m);
+
+ //! destructor - calls release()
+ ~oclMat();
+
+ //! assignment operators
+ oclMat &operator = (const oclMat &m);
+ //! assignment operator. Perfom blocking upload to device.
+ oclMat &operator = (const Mat &m);
+ oclMat &operator = (const oclMatExpr& expr);
+
+ //! pefroms blocking upload data to oclMat.
+ void upload(const cv::Mat &m);
+
+
+ //! downloads data from device to host memory. Blocking calls.
+ operator Mat() const;
+ void download(cv::Mat &m) const;
+
+
+ //! returns a new oclMatrix header for the specified row
+ oclMat row(int y) const;
+ //! returns a new oclMatrix header for the specified column
+ oclMat col(int x) const;
+ //! ... for the specified row span
+ oclMat rowRange(int startrow, int endrow) const;
+ oclMat rowRange(const Range &r) const;
+ //! ... for the specified column span
+ oclMat colRange(int startcol, int endcol) const;
+ oclMat colRange(const Range &r) const;
+
+ //! returns deep copy of the oclMatrix, i.e. the data is copied
+ oclMat clone() const;
+ //! copies the oclMatrix content to "m".
+ // It calls m.create(this->size(), this->type()).
+ // It supports any data type
+ void copyTo( oclMat &m ) const;
+ //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
+ //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ void copyTo( oclMat &m, const oclMat &mask ) const;
+ //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
+ //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
+
+ void assignTo( oclMat &m, int type = -1 ) const;
+
+ //! sets every oclMatrix element to s
+ //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ oclMat& operator = (const Scalar &s);
+ //! sets some of the oclMatrix elements to s, according to the mask
+ //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
+ //! creates alternative oclMatrix header for the same data, with different
+ // number of channels and/or different number of rows. see cvReshape.
+ oclMat reshape(int cn, int rows = 0) const;
+
+ //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
+ // previous data is unreferenced if needed.
+ void create(int rows, int cols, int type);
+ void create(Size size, int type);
+
+ //! allocates new oclMatrix with specified device memory type.
- void createEx(Size size, int type, DevMemRW rw_type,
++ void createEx(int rows, int cols, int type,
+ DevMemRW rw_type, DevMemType mem_type, void* hptr = 0);
-
++ void createEx(Size size, int type, DevMemRW rw_type,
+ DevMemType mem_type, void* hptr = 0);
+
+ //! decreases reference counter;
+ // deallocate the data when reference counter reaches 0.
+ void release();
+
+ //! swaps with other smart pointer
+ void swap(oclMat &mat);
+
+ //! locates oclMatrix header within a parent oclMatrix. See below
+ void locateROI( Size &wholeSize, Point &ofs ) const;
+ //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
+ oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ //! extracts a rectangular sub-oclMatrix
+ // (this is a generalized form of row, rowRange etc.)
+ oclMat operator()( Range rowRange, Range colRange ) const;
+ oclMat operator()( const Rect &roi ) const;
+
+ oclMat& operator+=( const oclMat& m );
+ oclMat& operator-=( const oclMat& m );
+ oclMat& operator*=( const oclMat& m );
+ oclMat& operator/=( const oclMat& m );
+
+ //! returns true if the oclMatrix data is continuous
+ // (i.e. when there are no gaps between successive rows).
+ // similar to CV_IS_oclMat_CONT(cvoclMat->type)
+ bool isContinuous() const;
+ //! returns element size in bytes,
+ // similar to CV_ELEM_SIZE(cvMat->type)
+ size_t elemSize() const;
+ //! returns the size of element channel in bytes.
+ size_t elemSize1() const;
+ //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+ int type() const;
+ //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+ //! 3 channels element actually use 4 channel space
+ int ocltype() const;
+ //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+ int depth() const;
+ //! returns element type, similar to CV_MAT_CN(cvMat->type)
+ int channels() const;
+ //! returns element type, return 4 for 3 channels element,
+ //!becuase 3 channels element actually use 4 channel space
+ int oclchannels() const;
+ //! returns step/elemSize1()
+ size_t step1() const;
+ //! returns oclMatrix size:
+ // width == number of columns, height == number of rows
+ Size size() const;
+ //! returns true if oclMatrix data is NULL
+ bool empty() const;
+
+ //! returns pointer to y-th row
+ uchar* ptr(int y = 0);
+ const uchar *ptr(int y = 0) const;
+
+ //! template version of the above method
+ template<typename _Tp> _Tp *ptr(int y = 0);
+ template<typename _Tp> const _Tp *ptr(int y = 0) const;
+
+ //! matrix transposition
+ oclMat t() const;
+
+ /*! includes several bit-fields:
+ - the magic signature
+ - continuity flag
+ - depth
+ - number of channels
+ */
+ int flags;
+ //! the number of rows and columns
+ int rows, cols;
+ //! a distance between successive rows in bytes; includes the gap if any
+ size_t step;
+ //! pointer to the data(OCL memory object)
+ uchar *data;
+
+ //! pointer to the reference counter;
+ // when oclMatrix points to user-allocated data, the pointer is NULL
+ int *refcount;
+
+ //! helper fields used in locateROI and adjustROI
+ //datastart and dataend are not used in current version
+ uchar *datastart;
+ uchar *dataend;
+
+ //! OpenCL context associated with the oclMat object.
+ Context *clCxt;
+ //add offset for handle ROI, calculated in byte
+ int offset;
+ //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
+ int wholerows;
+ int wholecols;
+ };
+
+
+ ///////////////////// mat split and merge /////////////////////////////////
+ //! Compose a multi-channel array from several single-channel arrays
+ // Support all types
+ CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
+ CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
+
+ //! Divides multi-channel array into several single-channel arrays
+ // Support all types
+ CV_EXPORTS void split(const oclMat &src, oclMat *dst);
+ CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
+
+ ////////////////////////////// Arithmetics ///////////////////////////////////
+ //#if defined DOUBLE_SUPPORT
+ //typedef double F;
+ //#else
+ //typedef float F;
+ //#endif
+ // CV_EXPORTS void addWeighted(const oclMat& a,F alpha, const oclMat& b,F beta,F gama, oclMat& c);
+ CV_EXPORTS void addWeighted(const oclMat &a, double alpha, const oclMat &b, double beta, double gama, oclMat &c);
+ //! adds one matrix to another (c = a + b)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
+ //! adds one matrix to another (c = a + b)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ //! adds scalar to a matrix (c = a + s)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ //! subtracts one matrix from another (c = a - b)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
+ //! subtracts one matrix from another (c = a - b)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ //! subtracts scalar from a matrix (c = a - s)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ //! subtracts scalar from a matrix (c = a - s)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
+ //! computes element-wise product of the two arrays (c = a * b)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
++ //! multiplies matrix to a number (dst = scalar * src)
++ // supports CV_32FC1 only
++ CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
+ //! computes element-wise quotient of the two arrays (c = a / b)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+ //! computes element-wise quotient of the two arrays (c = a / b)
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
+
+ //! compares elements of two arrays (c = a <cmpop> b)
+ // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
+ CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
+
+ //! transposes the matrix
+ // supports CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
+ CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
+
+ //! computes element-wise absolute difference of two arrays (c = abs(a - b))
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
+ //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
+ // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
+
+ //! computes mean value and standard deviation of all or selected array elements
+ // supports except CV_32F,CV_64F
+ CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
+
+ //! computes norm of array
+ // supports NORM_INF, NORM_L1, NORM_L2
+ // supports only CV_8UC1 type
+ CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
+
+ //! computes norm of the difference between two arrays
+ // supports NORM_INF, NORM_L1, NORM_L2
+ // supports only CV_8UC1 type
+ CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
+
+ //! reverses the order of the rows, columns or both in a matrix
+ // supports all types
+ CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
+
+ //! computes sum of array elements
+ // disabled until fix crash
+ // support all types
+ CV_EXPORTS Scalar sum(const oclMat &m);
+ CV_EXPORTS Scalar absSum(const oclMat &m);
+ CV_EXPORTS Scalar sqrSum(const oclMat &m);
+
+ //! finds global minimum and maximum array elements and returns their values
+ // support all C1 types
+
+ CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+
+ //! finds global minimum and maximum array elements and returns their values with locations
+ // support all C1 types
+
+ CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
+ const oclMat &mask = oclMat());
+
+ //! counts non-zero array elements
+ // support all types
+ CV_EXPORTS int countNonZero(const oclMat &src);
+
+ //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+ // destination array will have the depth type as lut and the same channels number as source
+ //It supports 8UC1 8UC4 only
+ CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
+
+ //! only 8UC1 and 256 bins is supported now
+ CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
+ //! only 8UC1 and 256 bins is supported now
+ CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+ //! bilateralFilter
+ // supports 8UC1 8UC4
+ CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+ //! computes exponent of each matrix element (b = e**a)
+ // supports only CV_32FC1 type
+ CV_EXPORTS void exp(const oclMat &a, oclMat &b);
+
+ //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
+ // supports only CV_32FC1 type
+ CV_EXPORTS void log(const oclMat &a, oclMat &b);
+
+ //! computes magnitude of each (x(i), y(i)) vector
+ // supports only CV_32F CV_64F type
+ CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
+
+ CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
+
+ //! computes angle (angle(i)) of each (x(i), y(i)) vector
+ // supports only CV_32F CV_64F type
+ CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
+
+ //! the function raises every element of tne input array to p
+ //! support only CV_32F CV_64F type
+ CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
+
+ //! converts Cartesian coordinates to polar
+ // supports only CV_32F CV_64F type
+ CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
+
+ //! converts polar coordinates to Cartesian
+ // supports only CV_32F CV_64F type
+ CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
+
+ //! perfroms per-elements bit-wise inversion
+ // supports all types
+ CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
+ //! calculates per-element bit-wise disjunction of two arrays
+ // supports all types
+ CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ //! calculates per-element bit-wise conjunction of two arrays
+ // supports all types
+ CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ //! calculates per-element bit-wise "exclusive or" operation
+ // supports all types
+ CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+
+ //! Logical operators
+ CV_EXPORTS oclMat operator ~ (const oclMat &);
+ CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+ CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+ CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+
+
+ //! Mathematics operators
+ CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
+ CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
+ CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
+ CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
+
+ struct CV_EXPORTS ConvolveBuf
+ {
+ Size result_size;
+ Size block_size;
+ Size user_block_size;
+ Size dft_size;
+
+ oclMat image_spect, templ_spect, result_spect;
+ oclMat image_block, templ_block, result_data;
+
+ void create(Size image_size, Size templ_size);
+ static Size estimateBlockSize(Size result_size, Size templ_size);
+ };
+
+ //! computes convolution of two images, may use discrete Fourier transform
+ //! support only CV_32FC1 type
+ CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
+ CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
+
+ //! Performs a per-element multiplication of two Fourier spectrums.
+ //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
+ //! support only CV_32FC2 type
+ CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
+
+ CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
+
+ //////////////////////////////// Filter Engine ////////////////////////////////
+
+ /*!
+ The Base Class for 1D or Row-wise Filters
+
+ This is the base class for linear or non-linear filters that process 1D data.
+ In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+ */
+ class CV_EXPORTS BaseRowFilter_GPU
+ {
+ public:
+ BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ virtual ~BaseRowFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ int ksize, anchor, bordertype;
+ };
+
+ /*!
+ The Base Class for Column-wise Filters
+
+ This is the base class for linear or non-linear filters that process columns of 2D arrays.
+ Such filters are used for the "vertical" filtering parts in separable filters.
+ */
+ class CV_EXPORTS BaseColumnFilter_GPU
+ {
+ public:
+ BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ virtual ~BaseColumnFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ int ksize, anchor, bordertype;
+ };
+
+ /*!
+ The Base Class for Non-Separable 2D Filters.
+
+ This is the base class for linear or non-linear 2D filters.
+ */
+ class CV_EXPORTS BaseFilter_GPU
+ {
+ public:
+ BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
+ : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
+ virtual ~BaseFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ Size ksize;
+ Point anchor;
+ int borderType;
+ };
+
+ /*!
+ The Base Class for Filter Engine.
+
+ The class can be used to apply an arbitrary filtering operation to an image.
+ It contains all the necessary intermediate buffers.
+ */
+ class CV_EXPORTS FilterEngine_GPU
+ {
+ public:
+ virtual ~FilterEngine_GPU() {}
+
+ virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
+ };
+
+ //! returns the non-separable filter engine with the specified filter
+ CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
+
+ //! returns the primitive row filter with the specified kernel
+ CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
+ int anchor = -1, int bordertype = BORDER_DEFAULT);
+
+ //! returns the primitive column filter with the specified kernel
+ CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
+ int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
+
+ //! returns the separable linear filter engine
+ CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
+ const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! returns the separable filter engine with the specified filters
+ CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ const Ptr<BaseColumnFilter_GPU> &columnFilter);
+
+ //! returns the Gaussian filter engine
+ CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+
+ //! returns filter engine for the generalized Sobel operator
+ CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
+
+ //! applies Laplacian operator to the image
+ // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
+ CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
+
+ //! returns 2D box filter
+ // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
+ CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
+ const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns box filter engine
+ CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
+ const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns 2D filter with the specified kernel
+ // supports CV_8UC1 and CV_8UC4 types
+ CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
+ Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns the non-separable linear filter engine
+ CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
+ const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! smooths the image using the normalized box filter
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
+ CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
+ Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! returns 2D morphological filter
+ //! only MORPH_ERODE and MORPH_DILATE are supported
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+ CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
+ Point anchor = Point(-1, -1));
+
+ //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+ CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
+ const Point &anchor = Point(-1, -1), int iterations = 1);
+
+ //! a synonym for normalized box filter
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
+ int borderType = BORDER_CONSTANT)
+ {
+ boxFilter(src, dst, -1, ksize, anchor, borderType);
+ }
+
+ //! applies non-separable 2D linear filter to the image
+ CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
+ Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+ //! applies separable 2D linear filter to the image
+ CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
+ Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! applies generalized Sobel operator to the image
+ // dst.type must equalize src.type
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! applies the vertical or horizontal Scharr operator to the image
+ // dst.type must equalize src.type
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+
+ //! smooths the image using Gaussian filter.
+ // dst.type must equalize src.type
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+
+ //! erodes the image (applies the local minimum operator)
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
+
+ //! dilates the image (applies the local maximum operator)
+ // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
+
+ //! applies an advanced morphological operation to the image
+ CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
+
+ ////////////////////////////// Image processing //////////////////////////////
+ //! Does mean shift filtering on GPU.
+ CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+ //! Does mean shift procedure on GPU.
+ CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+ //! Does mean shift segmentation with elimiation of small regions.
+ CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+
+ //! applies fixed threshold to the image.
+ // supports CV_8UC1 and CV_32FC1 data type
+ // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
+ CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
+
+ //! resizes the image
+ // Supports INTER_NEAREST, INTER_LINEAR
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
+
+ //! Applies a generic geometrical transformation to an image.
+
+ // Supports INTER_NEAREST, INTER_LINEAR.
+
+ // Map1 supports CV_16SC2, CV_32FC2 types.
+
+ // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
+
+ CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
+
+ //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+ // supports CV_8UC1, CV_8UC4, CV_32SC1 types
+ CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
+
+ //! Smoothes image using median filter
+ // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
+ CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
+
+ //! warps the image using affine transformation
+ // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+
+ //! warps the image using perspective transformation
+ // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+
+ //! computes the integral image and integral for the squared image
+ // sum will have CV_32S type, sqsum - CV32F type
+ // supports only CV_8UC1 source type
+ CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
+ CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
+ CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
+ ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if 0
+ class CV_EXPORTS OclCascadeClassifier : public cv::CascadeClassifier
+ {
+ public:
+ OclCascadeClassifier() {};
+ ~OclCascadeClassifier() {};
+
+ CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
+ int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
+ };
+#endif
+
++ class CV_EXPORTS OclCascadeClassifierBuf : public cv::CascadeClassifier
++ {
++ public:
++ OclCascadeClassifierBuf() :
++ m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
+
++ ~OclCascadeClassifierBuf() {}
++
++ void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
++ double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
++ Size minSize = Size(), Size maxSize = Size());
++ void release();
++
++ private:
++ void Init(const int rows, const int cols, double scaleFactor, int flags,
++ const int outputsz, const size_t localThreads[],
++ Size minSize, Size maxSize);
++ void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
++ void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
++ const double scaleFactor, const size_t localThreads[],
++ Size minSize, Size maxSize);
++ void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
++
++ int m_rows;
++ int m_cols;
++ int m_flags;
++ int m_loopcount;
++ int m_nodenum;
++ bool findBiggestObject;
++ bool initialized;
++ double m_scaleFactor;
++ Size m_minSize;
++ Size m_maxSize;
++ std::vector<Size> sizev;
++ std::vector<float> scalev;
++ oclMat gimg1, gsum, gsqsum;
++ void * buffers;
++ };
+
+ /////////////////////////////// Pyramid /////////////////////////////////////
+ CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
+
+ //! upsamples the source image and then smoothes it
+ CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
+
+ //! performs linear blending of two images
+ //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ // supports only CV_8UC1 source type
+ CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
+
+ //! computes vertical sum, supports only CV_32FC1 images
+ CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
+
+ ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+ struct CV_EXPORTS MatchTemplateBuf
+ {
+ Size user_block_size;
+ oclMat imagef, templf;
+ std::vector<oclMat> images;
+ std::vector<oclMat> image_sums;
+ std::vector<oclMat> image_sqsums;
+ };
+
-
+ //! computes the proximity map for the raster template and the image where the template is searched for
+ // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
+
+ //! computes the proximity map for the raster template and the image where the template is searched for
+ // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
+
+
+
+ ///////////////////////////////////////////// Canny /////////////////////////////////////////////
+ struct CV_EXPORTS CannyBuf;
+
+ //! compute edges of the input image using Canny operator
+ // Support CV_8UC1 only
+ CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+
+ struct CV_EXPORTS CannyBuf
+ {
+ CannyBuf() : counter(NULL) {}
+ ~CannyBuf()
+ {
+ release();
+ }
+ explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
+ {
+ create(image_size, apperture_size);
+ }
+ CannyBuf(const oclMat &dx_, const oclMat &dy_);
+ void create(const Size &image_size, int apperture_size = 3);
+ void release();
+
+ oclMat dx, dy;
+ oclMat dx_buf, dy_buf;
+ oclMat magBuf, mapBuf;
+ oclMat trackBuf1, trackBuf2;
+ void *counter;
+ Ptr<FilterEngine_GPU> filterDX, filterDY;
+ };
+
+ ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
+ //! HoughCircles
+ struct HoughCirclesBuf
+ {
+ oclMat edges;
+ oclMat accum;
+ oclMat srcPoints;
+ oclMat centers;
+ CannyBuf cannyBuf;
+ };
+
+ CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
+
+
+ ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+ //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ //! Param dft_size is the size of DFT transform.
+ //!
+ //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+ // support src type of CV32FC1, CV32FC2
+ // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+ // dft_size is the size of original input, which is used for transformation from complex to real.
+ // dft_size must be powers of 2, 3 and 5
+ // real to complex dft requires at least v1.8 clAmdFft
+ // real to complex dft output is not the same with cpu version
+ // real to complex and complex to real does not support DFT_ROWS
+ CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
+
+ //! implements generalized matrix product algorithm GEMM from BLAS
+ // The functionality requires clAmdBlas library
+ // only support type CV_32FC1
+ // flag GEMM_3_T is not supported
+ CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ const oclMat &src3, double beta, oclMat &dst, int flags = 0);
+
+ //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+
+ struct CV_EXPORTS HOGDescriptor
+
+ {
+
+ enum { DEFAULT_WIN_SIGMA = -1 };
+
+ enum { DEFAULT_NLEVELS = 64 };
+
+ enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+
+
+
+ HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
+
+ Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
+
+ int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
+
+ double threshold_L2hys = 0.2, bool gamma_correction = true,
+
+ int nlevels = DEFAULT_NLEVELS);
+
+
+
+ size_t getDescriptorSize() const;
+
+ size_t getBlockHistogramSize() const;
+
+
+
+ void setSVMDetector(const std::vector<float> &detector);
+
+
+
+ static std::vector<float> getDefaultPeopleDetector();
+
+ static std::vector<float> getPeopleDetector48x96();
+
+ static std::vector<float> getPeopleDetector64x128();
+
+
+
+ void detect(const oclMat &img, std::vector<Point> &found_locations,
+
+ double hit_threshold = 0, Size win_stride = Size(),
+
+ Size padding = Size());
+
+
+
+ void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
+
+ double hit_threshold = 0, Size win_stride = Size(),
+
+ Size padding = Size(), double scale0 = 1.05,
+
+ int group_threshold = 2);
+
+
+
+ void getDescriptors(const oclMat &img, Size win_stride,
+
+ oclMat &descriptors,
+
+ int descr_format = DESCR_FORMAT_COL_BY_COL);
+
+
+
+ Size win_size;
+
+ Size block_size;
+
+ Size block_stride;
+
+ Size cell_size;
+
+ int nbins;
+
+ double win_sigma;
+
+ double threshold_L2hys;
+
+ bool gamma_correction;
+
+ int nlevels;
+
+
+
+ protected:
+
+ // initialize buffers; only need to do once in case of multiscale detection
+
+ void init_buffer(const oclMat &img, Size win_stride);
+
+
+
+ void computeBlockHistograms(const oclMat &img);
+
+ void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
+
+
+
+ double getWinSigma() const;
+
+ bool checkDetectorSize() const;
+
+
+
+ static int numPartsWithin(int size, int part_size, int stride);
+
+ static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+
+
+ // Coefficients of the separating plane
+
+ float free_coef;
+
+ oclMat detector;
+
+
+
+ // Results of the last classification step
+
+ oclMat labels;
+
+ Mat labels_host;
+
+
+
+ // Results of the last histogram evaluation step
+
+ oclMat block_hists;
+
+
+
+ // Gradients conputation results
+
+ oclMat grad, qangle;
+
+
+
+ // scaled image
+
+ oclMat image_scale;
+
+
+
+ // effect size of input image (might be different from original size after scaling)
+
+ Size effect_size;
+
+ };
+
+
+ ////////////////////////feature2d_ocl/////////////////
+ /****************************************************************************************\
+ * Distance *
+ \****************************************************************************************/
-
+ template<typename T>
+ struct CV_EXPORTS Accumulator
+ {
+ typedef T Type;
+ };
-
+ template<> struct Accumulator<unsigned char>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<unsigned short>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<char>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<short>
+ {
+ typedef float Type;
+ };
+
+ /*
+ * Manhattan distance (city block distance) functor
+ */
+ template<class T>
+ struct CV_EXPORTS L1
+ {
+ enum { normType = NORM_L1 };
+ typedef T ValueType;
+ typedef typename Accumulator<T>::Type ResultType;
+
+ ResultType operator()( const T *a, const T *b, int size ) const
+ {
+ return normL1<ValueType, ResultType>(a, b, size);
+ }
+ };
+
+ /*
+ * Euclidean distance functor
+ */
+ template<class T>
+ struct CV_EXPORTS L2
+ {
+ enum { normType = NORM_L2 };
+ typedef T ValueType;
+ typedef typename Accumulator<T>::Type ResultType;
+
+ ResultType operator()( const T *a, const T *b, int size ) const
+ {
+ return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+ }
+ };
+
+ /*
+ * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ * bit count of A exclusive XOR'ed with B
+ */
+ struct CV_EXPORTS Hamming
+ {
+ enum { normType = NORM_HAMMING };
+ typedef unsigned char ValueType;
+ typedef int ResultType;
+
+ /** this will count the bits in a ^ b
+ */
+ ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
+ {
+ return normHamming(a, b, size);
+ }
+ };
+
+ ////////////////////////////////// BruteForceMatcher //////////////////////////////////
+
+ class CV_EXPORTS BruteForceMatcher_OCL_base
+ {
+ public:
+ enum DistType {L1Dist = 0, L2Dist, HammingDist};
-
-
+ explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
+
-
+ // Add descriptors to train descriptor collection
-
-
+ void add(const std::vector<oclMat> &descCollection);
+
-
+ // Get train descriptors collection
-
-
+ const std::vector<oclMat> &getTrainDescriptors() const;
+
-
+ // Clear train descriptors collection
-
-
+ void clear();
+
-
+ // Return true if there are not train descriptors in collection
-
-
+ bool empty() const;
+
-
+ // Return true if the matcher supports mask in match methods
-
-
+ bool isMaskSupported() const;
+
-
+ // Find one best match for each query descriptor
-
+ void matchSingle(const oclMat &query, const oclMat &train,
-
+ oclMat &trainIdx, oclMat &distance,
-
-
+ const oclMat &mask = oclMat());
+
-
+ // Download trainIdx and distance and convert it to CPU vector with DMatch
-
+ static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
-
+ // Convert trainIdx and distance to vector with DMatch
-
-
+ static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
+
-
+ // Find one best match for each query descriptor
-
-
+ void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
+
-
+ // Make gpu collection of trains and masks in suitable format for matchCollection function
-
-
+ void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
+
-
+ // Find one best match from train collection for each query descriptor
-
+ void matchCollection(const oclMat &query, const oclMat &trainCollection,
-
+ oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-
-
+ const oclMat &masks = oclMat());
+
-
+ // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
-
+ static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
-
+ // Convert trainIdx, imgIdx and distance to vector with DMatch
-
-
+ static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
+
-
+ // Find one best match from train collection for each query descriptor.
-
-
+ void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
+
-
+ // Find k best matches for each query descriptor (in increasing order of distances)
-
+ void knnMatchSingle(const oclMat &query, const oclMat &train,
-
+ oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
-
-
+ const oclMat &mask = oclMat());
+
-
+ // Download trainIdx and distance and convert it to vector with DMatch
-
+ // compactResult is used when mask is not empty. If compactResult is false matches
-
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
-
+ // matches vector will not contain matches for fully masked out query descriptors.
-
+ static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
+ // Convert trainIdx and distance to vector with DMatch
-
+ static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
-
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
-
+ // Find k best matches for each query descriptor (in increasing order of distances).
-
+ // compactResult is used when mask is not empty. If compactResult is false matches
-
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
-
+ // matches vector will not contain matches for fully masked out query descriptors.
-
+ void knnMatch(const oclMat &query, const oclMat &train,
-
+ std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
-
-
+ bool compactResult = false);
+
-
+ // Find k best matches from train collection for each query descriptor (in increasing order of distances)
-
+ void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
-
+ oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-
-
+ const oclMat &maskCollection = oclMat());
+
-
+ // Download trainIdx and distance and convert it to vector with DMatch
-
+ // compactResult is used when mask is not empty. If compactResult is false matches
-
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
-
+ // matches vector will not contain matches for fully masked out query descriptors.
-
+ static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
+ // Convert trainIdx and distance to vector with DMatch
-
+ static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
-
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
-
+ // Find k best matches for each query descriptor (in increasing order of distances).
-
+ // compactResult is used when mask is not empty. If compactResult is false matches
-
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
-
+ // matches vector will not contain matches for fully masked out query descriptors.
-
+ void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
-
-
+ const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+
-
+ // Find best matches for each query descriptor which have distance less than maxDistance.
-
+ // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
-
+ // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
-
+ // because it didn't have enough memory.
-
+ // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
-
+ // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-
+ // Matches doesn't sorted.
-
+ void radiusMatchSingle(const oclMat &query, const oclMat &train,
-
+ oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-
-
+ const oclMat &mask = oclMat());
+
-
+ // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
-
+ // matches will be sorted in increasing order of distances.
-
+ // compactResult is used when mask is not empty. If compactResult is false matches
-
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
-
+ // matches vector will not contain matches for fully masked out query descriptors.
-
+ static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
+ // Convert trainIdx, nMatches and distance to vector with DMatch.
-
+ static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
-
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
-
+ // Find best matches for each query descriptor which have distance less than maxDistance
-
+ // in increasing order of distances).
-
+ void radiusMatch(const oclMat &query, const oclMat &train,
-
+ std::vector< std::vector<DMatch> > &matches, float maxDistance,
-
-
+ const oclMat &mask = oclMat(), bool compactResult = false);
+
-
+ // Find best matches for each query descriptor which have distance less than maxDistance.
-
+ // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
-
+ // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-
+ // Matches doesn't sorted.
-
+ void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-
-
+ const std::vector<oclMat> &masks = std::vector<oclMat>());
+
-
+ // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
-
+ // matches will be sorted in increasing order of distances.
-
+ // compactResult is used when mask is not empty. If compactResult is false matches
-
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
-
+ // matches vector will not contain matches for fully masked out query descriptors.
-
+ static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
+ // Convert trainIdx, nMatches and distance to vector with DMatch.
-
+ static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
-
-
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
-
+ // Find best matches from train collection for each query descriptor which have distance less than
-
+ // maxDistance (in increasing order of distances).
-
+ void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
-
-
+ const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+
-
-
+ DistType distType;
+
-
+ private:
-
+ std::vector<oclMat> trainDescCollection;
-
-
+ };
+
-
+ template <class Distance>
-
-
+ class CV_EXPORTS BruteForceMatcher_OCL;
+
-
+ template <typename T>
-
+ class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
-
+ {
-
+ public:
-
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
-
+ explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
-
+ };
-
+ template <typename T>
-
+ class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
-
+ {
-
+ public:
-
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
-
+ explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
-
+ };
-
+ template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
-
+ {
-
+ public:
-
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
-
+ explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
-
+ };
+
-
++ class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
++ {
++ public:
++ explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
++ };
+
+ /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+
+ class CV_EXPORTS PyrLKOpticalFlow
-
+ {
-
+ public:
-
+ PyrLKOpticalFlow()
-
+ {
-
+ winSize = Size(21, 21);
-
+ maxLevel = 3;
-
+ iters = 30;
-
+ derivLambda = 0.5;
-
+ useInitialFlow = false;
-
+ minEigThreshold = 1e-4f;
-
+ getMinEigenVals = false;
-
+ isDeviceArch11_ = false;
-
-
+ }
+
-
+ void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
-
-
+ oclMat &status, oclMat *err = 0);
+
-
-
+ void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
+
-
+ Size winSize;
-
+ int maxLevel;
-
+ int iters;
-
+ double derivLambda;
-
+ bool useInitialFlow;
-
+ float minEigThreshold;
-
-
+ bool getMinEigenVals;
+
-
+ void releaseMemory()
-
+ {
-
+ dx_calcBuf_.release();
-
-
+ dy_calcBuf_.release();
+
-
+ prevPyr_.clear();
-
-
+ nextPyr_.clear();
+
-
+ dx_buf_.release();
-
+ dy_buf_.release();
-
-
+ }
+
-
+ private:
-
-
+ void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
+
-
-
+ void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
+
-
+ oclMat dx_calcBuf_;
-
-
+ oclMat dy_calcBuf_;
+
-
+ std::vector<oclMat> prevPyr_;
-
-
+ std::vector<oclMat> nextPyr_;
+
-
+ oclMat dx_buf_;
-
-
+ oclMat dy_buf_;
+
-
+ oclMat uPyr_[2];
-
-
+ oclMat vPyr_[2];
+
-
+ bool isDeviceArch11_;
+ };
+ //////////////// build warping maps ////////////////////
+ //! builds plane warping maps
+ CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
+ //! builds cylindrical warping maps
+ CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ //! builds spherical warping maps
+ CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ //! builds Affine warping maps
+ CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+
+ //! builds Perspective warping maps
+ CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+
+ ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
+ //! Interpolate frames (images) using provided optical flow (displacement field).
+ //! frame0 - frame 0 (32-bit floating point images, single channel)
+ //! frame1 - frame 1 (the same type and size)
+ //! fu - forward horizontal displacement
+ //! fv - forward vertical displacement
+ //! bu - backward horizontal displacement
+ //! bv - backward vertical displacement
+ //! pos - new frame position
+ //! newFrame - new frame
+ //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
+ //! occlusion masks 0, occlusion masks 1,
+ //! interpolated forward flow 0, interpolated forward flow 1,
+ //! interpolated backward flow 0, interpolated backward flow 1
+ //!
+ CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
+ const oclMat &fu, const oclMat &fv,
+ const oclMat &bu, const oclMat &bv,
+ float pos, oclMat &newFrame, oclMat &buf);
+
+ //! computes moments of the rasterized shape or a vector of points
+ CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
+
+ class CV_EXPORTS StereoBM_OCL
+ {
+ public:
+ enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+
+ enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+
+ //! the default constructor
+ StereoBM_OCL();
+ //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+ StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+
+ //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+ //! Output disparity has CV_8U type.
+ void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
+
+ //! Some heuristics that tries to estmate
+ // if current GPU will be faster then CPU in this algorithm.
+ // It queries current active device.
+ static bool checkIfGpuCallReasonable();
+
+ int preset;
+ int ndisp;
+ int winSize;
+
+ // If avergeTexThreshold == 0 => post procesing is disabled
+ // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+ // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+ // i.e. input left image is low textured.
+ float avergeTexThreshold;
+ private:
+ oclMat minSSD, leBuf, riBuf;
+ };
++
+ class CV_EXPORTS StereoBeliefPropagation
+ {
+ public:
+ enum { DEFAULT_NDISP = 64 };
+ enum { DEFAULT_ITERS = 5 };
+ enum { DEFAULT_LEVELS = 5 };
+ static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
+ explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
+ int iters = DEFAULT_ITERS,
+ int levels = DEFAULT_LEVELS,
+ int msg_type = CV_16S);
+ StereoBeliefPropagation(int ndisp, int iters, int levels,
+ float max_data_term, float data_weight,
+ float max_disc_term, float disc_single_jump,
+ int msg_type = CV_32F);
+ void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ void operator()(const oclMat &data, oclMat &disparity);
+ int ndisp;
+ int iters;
+ int levels;
+ float max_data_term;
+ float data_weight;
+ float max_disc_term;
+ float disc_single_jump;
+ int msg_type;
+ private:
+ oclMat u, d, l, r, u2, d2, l2, r2;
+ std::vector<oclMat> datas;
+ oclMat out;
+ };
++
++ class CV_EXPORTS StereoConstantSpaceBP
++ {
++ public:
++ enum { DEFAULT_NDISP = 128 };
++ enum { DEFAULT_ITERS = 8 };
++ enum { DEFAULT_LEVELS = 4 };
++ enum { DEFAULT_NR_PLANE = 4 };
++ static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
++ explicit StereoConstantSpaceBP(
++ int ndisp = DEFAULT_NDISP,
++ int iters = DEFAULT_ITERS,
++ int levels = DEFAULT_LEVELS,
++ int nr_plane = DEFAULT_NR_PLANE,
++ int msg_type = CV_32F);
++ StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
++ float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
++ int min_disp_th = 0,
++ int msg_type = CV_32F);
++ void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
++ int ndisp;
++ int iters;
++ int levels;
++ int nr_plane;
++ float max_data_term;
++ float data_weight;
++ float max_disc_term;
++ float disc_single_jump;
++ int min_disp_th;
++ int msg_type;
++ bool use_local_init_data_cost;
++ private:
++ oclMat u[2], d[2], l[2], r[2];
++ oclMat disp_selected_pyr[2];
++ oclMat data_cost;
++ oclMat data_cost_selected;
++ oclMat temp;
++ oclMat out;
++ };
++
++ // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
++ //
++ // see reference:
++ // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
++ // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
++ class CV_EXPORTS OpticalFlowDual_TVL1_OCL
++ {
++ public:
++ OpticalFlowDual_TVL1_OCL();
++
++ void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
++
++ void collectGarbage();
++
++ /**
++ * Time step of the numerical scheme.
++ */
++ double tau;
++
++ /**
++ * Weight parameter for the data term, attachment parameter.
++ * This is the most relevant parameter, which determines the smoothness of the output.
++ * The smaller this parameter is, the smoother the solutions we obtain.
++ * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
++ */
++ double lambda;
++
++ /**
++ * Weight parameter for (u - v)^2, tightness parameter.
++ * It serves as a link between the attachment and the regularization terms.
++ * In theory, it should have a small value in order to maintain both parts in correspondence.
++ * The method is stable for a large range of values of this parameter.
++ */
++ double theta;
++
++ /**
++ * Number of scales used to create the pyramid of images.
++ */
++ int nscales;
++
++ /**
++ * Number of warpings per scale.
++ * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
++ * This is a parameter that assures the stability of the method.
++ * It also affects the running time, so it is a compromise between speed and accuracy.
++ */
++ int warps;
++
++ /**
++ * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
++ * A small value will yield more accurate solutions at the expense of a slower convergence.
++ */
++ double epsilon;
++
++ /**
++ * Stopping criterion iterations number used in the numerical scheme.
++ */
++ int iterations;
++
++ bool useInitialFlow;
++
++ private:
++ void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
++
++ std::vector<oclMat> I0s;
++ std::vector<oclMat> I1s;
++ std::vector<oclMat> u1s;
++ std::vector<oclMat> u2s;
++
++ oclMat I1x_buf;
++ oclMat I1y_buf;
++
++ oclMat I1w_buf;
++ oclMat I1wx_buf;
++ oclMat I1wy_buf;
++
++ oclMat grad_buf;
++ oclMat rho_c_buf;
++
++ oclMat p11_buf;
++ oclMat p12_buf;
++ oclMat p21_buf;
++ oclMat p22_buf;
++
++ oclMat diff_buf;
++ oclMat norm_buf;
++ };
+ }
+}
+#if defined _MSC_VER && _MSC_VER >= 1200
+# pragma warning( push)
+# pragma warning( disable: 4267)
+#endif
+#include "opencv2/ocl/matrix_operations.hpp"
+#if defined _MSC_VER && _MSC_VER >= 1200
+# pragma warning( pop)
+#endif
+
+#endif /* __OPENCV_OCL_HPP__ */
//
//M*/
-#ifndef __OPENCV_OCL_HPP__
-#define __OPENCV_OCL_HPP__
-
-#include <memory>
-#include <vector>
-
-#include "opencv2/core/core.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/objdetect/objdetect.hpp"
-#include "opencv2/features2d/features2d.hpp"
-
-namespace cv
-{
- namespace ocl
- {
- using std::auto_ptr;
- enum
- {
- CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
- CVCL_DEVICE_TYPE_CPU = (1 << 1),
- CVCL_DEVICE_TYPE_GPU = (1 << 2),
- CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
- //CVCL_DEVICE_TYPE_CUSTOM = (1 << 4)
- CVCL_DEVICE_TYPE_ALL = 0xFFFFFFFF
- };
-
- enum DevMemRW
- {
- DEVICE_MEM_R_W = 0,
- DEVICE_MEM_R_ONLY,
- DEVICE_MEM_W_ONLY
- };
-
- enum DevMemType
- {
- DEVICE_MEM_DEFAULT = 0,
- DEVICE_MEM_AHP, //alloc host pointer
- DEVICE_MEM_UHP, //use host pointer
- DEVICE_MEM_CHP, //copy host pointer
- DEVICE_MEM_PM //persistent memory
- };
-
- //Get the global device memory and read/write type
- //return 1 if unified memory system supported, otherwise return 0
- CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
-
- //Set the global device memory and read/write type,
- //the newly generated oclMat will all use this type
- //return -1 if the target type is unsupported, otherwise return 0
- CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
-
- //this class contains ocl runtime information
- class CV_EXPORTS Info
- {
- public:
- struct Impl;
- Impl *impl;
-
- Info();
- Info(const Info &m);
- ~Info();
- void release();
- Info &operator = (const Info &m);
- std::vector<string> DeviceName;
- };
- //////////////////////////////// Initialization & Info ////////////////////////
- //this function may be obsoleted
- //CV_EXPORTS cl_device_id getDevice();
- //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
- //each Info relates to an OpenCL platform
- //there is one or more devices in each platform, each one has a separate name
- CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
-
- //set device you want to use, optional function after getDevice be called
- //the devnum is the index of the selected device in DeviceName vector of INfo
- CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
-
- //optional function, if you want save opencl binary kernel to the file, set its path
- CV_EXPORTS void setBinpath(const char *path);
-
- //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
- CV_EXPORTS void* getoclContext();
-
- CV_EXPORTS void* getoclCommandQueue();
-
- //explicit call clFinish. The global command queue will be used.
- CV_EXPORTS void finish();
-
- //this function enable ocl module to use customized cl_context and cl_command_queue
- //getDevice also need to be called before this function
- CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
-
- //////////////////////////////// Error handling ////////////////////////
- CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
-
- //////////////////////////////// OpenCL context ////////////////////////
- //This is a global singleton class used to represent a OpenCL context.
- class CV_EXPORTS Context
- {
- protected:
- Context();
- friend class auto_ptr<Context>;
-
- private:
- static auto_ptr<Context> clCxt;
- static int val;
- public:
- ~Context();
- void release();
- Info::Impl* impl;
-
- static Context* getContext();
- static void setContext(Info &oclinfo);
-
- enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
- bool supportsFeature(int ftype);
- size_t computeUnits();
- void* oclContext();
- void* oclCommandQueue();
- };
-
- //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
- CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
- const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- std::vector< std::pair<size_t, const void *> > &args,
- int channels, int depth, const char *build_options,
- bool finish = true, bool measureKernelTime = false,
- bool cleanUp = true);
-
- //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
- CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
- const char **fileName, const int numFiles, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- std::vector< std::pair<size_t, const void *> > &args,
- int channels, int depth, const char *build_options,
- bool finish = true, bool measureKernelTime = false,
- bool cleanUp = true);
-
- class CV_EXPORTS oclMatExpr;
- //////////////////////////////// oclMat ////////////////////////////////
- class CV_EXPORTS oclMat
- {
- public:
- //! default constructor
- oclMat();
- //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
- oclMat(int rows, int cols, int type);
- oclMat(Size size, int type);
- //! constucts oclMatrix and fills it with the specified value _s.
- oclMat(int rows, int cols, int type, const Scalar &s);
- oclMat(Size size, int type, const Scalar &s);
- //! copy constructor
- oclMat(const oclMat &m);
-
- //! constructor for oclMatrix headers pointing to user-allocated data
- oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
- oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
-
- //! creates a matrix header for a part of the bigger matrix
- oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
- oclMat(const oclMat &m, const Rect &roi);
-
- //! builds oclMat from Mat. Perfom blocking upload to device.
- explicit oclMat (const Mat &m);
-
- //! destructor - calls release()
- ~oclMat();
-
- //! assignment operators
- oclMat &operator = (const oclMat &m);
- //! assignment operator. Perfom blocking upload to device.
- oclMat &operator = (const Mat &m);
- oclMat &operator = (const oclMatExpr& expr);
-
- //! pefroms blocking upload data to oclMat.
- void upload(const cv::Mat &m);
-
-
- //! downloads data from device to host memory. Blocking calls.
- operator Mat() const;
- void download(cv::Mat &m) const;
-
-
- //! returns a new oclMatrix header for the specified row
- oclMat row(int y) const;
- //! returns a new oclMatrix header for the specified column
- oclMat col(int x) const;
- //! ... for the specified row span
- oclMat rowRange(int startrow, int endrow) const;
- oclMat rowRange(const Range &r) const;
- //! ... for the specified column span
- oclMat colRange(int startcol, int endcol) const;
- oclMat colRange(const Range &r) const;
-
- //! returns deep copy of the oclMatrix, i.e. the data is copied
- oclMat clone() const;
- //! copies the oclMatrix content to "m".
- // It calls m.create(this->size(), this->type()).
- // It supports any data type
- void copyTo( oclMat &m ) const;
- //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
- //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- void copyTo( oclMat &m, const oclMat &mask ) const;
- //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
- //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
-
- void assignTo( oclMat &m, int type = -1 ) const;
-
- //! sets every oclMatrix element to s
- //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- oclMat& operator = (const Scalar &s);
- //! sets some of the oclMatrix elements to s, according to the mask
- //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
- //! creates alternative oclMatrix header for the same data, with different
- // number of channels and/or different number of rows. see cvReshape.
- oclMat reshape(int cn, int rows = 0) const;
-
- //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
- // previous data is unreferenced if needed.
- void create(int rows, int cols, int type);
- void create(Size size, int type);
-
- //! allocates new oclMatrix with specified device memory type.
- void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
- void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
-
- //! decreases reference counter;
- // deallocate the data when reference counter reaches 0.
- void release();
-
- //! swaps with other smart pointer
- void swap(oclMat &mat);
-
- //! locates oclMatrix header within a parent oclMatrix. See below
- void locateROI( Size &wholeSize, Point &ofs ) const;
- //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
- oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
- //! extracts a rectangular sub-oclMatrix
- // (this is a generalized form of row, rowRange etc.)
- oclMat operator()( Range rowRange, Range colRange ) const;
- oclMat operator()( const Rect &roi ) const;
-
- oclMat& operator+=( const oclMat& m );
- oclMat& operator-=( const oclMat& m );
- oclMat& operator*=( const oclMat& m );
- oclMat& operator/=( const oclMat& m );
-
- //! returns true if the oclMatrix data is continuous
- // (i.e. when there are no gaps between successive rows).
- // similar to CV_IS_oclMat_CONT(cvoclMat->type)
- bool isContinuous() const;
- //! returns element size in bytes,
- // similar to CV_ELEM_SIZE(cvMat->type)
- size_t elemSize() const;
- //! returns the size of element channel in bytes.
- size_t elemSize1() const;
- //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
- int type() const;
- //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
- //! 3 channels element actually use 4 channel space
- int ocltype() const;
- //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
- int depth() const;
- //! returns element type, similar to CV_MAT_CN(cvMat->type)
- int channels() const;
- //! returns element type, return 4 for 3 channels element,
- //!becuase 3 channels element actually use 4 channel space
- int oclchannels() const;
- //! returns step/elemSize1()
- size_t step1() const;
- //! returns oclMatrix size:
- // width == number of columns, height == number of rows
- Size size() const;
- //! returns true if oclMatrix data is NULL
- bool empty() const;
-
- //! returns pointer to y-th row
- uchar* ptr(int y = 0);
- const uchar *ptr(int y = 0) const;
-
- //! template version of the above method
- template<typename _Tp> _Tp *ptr(int y = 0);
- template<typename _Tp> const _Tp *ptr(int y = 0) const;
-
- //! matrix transposition
- oclMat t() const;
-
- /*! includes several bit-fields:
- - the magic signature
- - continuity flag
- - depth
- - number of channels
- */
- int flags;
- //! the number of rows and columns
- int rows, cols;
- //! a distance between successive rows in bytes; includes the gap if any
- size_t step;
- //! pointer to the data(OCL memory object)
- uchar *data;
-
- //! pointer to the reference counter;
- // when oclMatrix points to user-allocated data, the pointer is NULL
- int *refcount;
-
- //! helper fields used in locateROI and adjustROI
- //datastart and dataend are not used in current version
- uchar *datastart;
- uchar *dataend;
-
- //! OpenCL context associated with the oclMat object.
- Context *clCxt;
- //add offset for handle ROI, calculated in byte
- int offset;
- //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
- int wholerows;
- int wholecols;
- };
-
-
- ///////////////////// mat split and merge /////////////////////////////////
- //! Compose a multi-channel array from several single-channel arrays
- // Support all types
- CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
- CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
-
- //! Divides multi-channel array into several single-channel arrays
- // Support all types
- CV_EXPORTS void split(const oclMat &src, oclMat *dst);
- CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
-
- ////////////////////////////// Arithmetics ///////////////////////////////////
- //#if defined DOUBLE_SUPPORT
- //typedef double F;
- //#else
- //typedef float F;
- //#endif
- // CV_EXPORTS void addWeighted(const oclMat& a,F alpha, const oclMat& b,F beta,F gama, oclMat& c);
- CV_EXPORTS void addWeighted(const oclMat &a, double alpha, const oclMat &b, double beta, double gama, oclMat &c);
- //! adds one matrix to another (c = a + b)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
- //! adds one matrix to another (c = a + b)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
- //! adds scalar to a matrix (c = a + s)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
- //! subtracts one matrix from another (c = a - b)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
- //! subtracts one matrix from another (c = a - b)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
- //! subtracts scalar from a matrix (c = a - s)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
- //! subtracts scalar from a matrix (c = a - s)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
- //! computes element-wise product of the two arrays (c = a * b)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
- //! multiplies matrix to a number (dst = scalar * src)
- // supports CV_32FC1 only
- CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
- //! computes element-wise quotient of the two arrays (c = a / b)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
- //! computes element-wise quotient of the two arrays (c = a / b)
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
-
- //! compares elements of two arrays (c = a <cmpop> b)
- // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
- CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
-
- //! transposes the matrix
- // supports CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
- CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
-
- //! computes element-wise absolute difference of two arrays (c = abs(a - b))
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
- //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
- // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
-
- //! computes mean value and standard deviation of all or selected array elements
- // supports except CV_32F,CV_64F
- CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
-
- //! computes norm of array
- // supports NORM_INF, NORM_L1, NORM_L2
- // supports only CV_8UC1 type
- CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
-
- //! computes norm of the difference between two arrays
- // supports NORM_INF, NORM_L1, NORM_L2
- // supports only CV_8UC1 type
- CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
-
- //! reverses the order of the rows, columns or both in a matrix
- // supports all types
- CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
-
- //! computes sum of array elements
- // disabled until fix crash
- // support all types
- CV_EXPORTS Scalar sum(const oclMat &m);
- CV_EXPORTS Scalar absSum(const oclMat &m);
- CV_EXPORTS Scalar sqrSum(const oclMat &m);
-
- //! finds global minimum and maximum array elements and returns their values
- // support all C1 types
-
- CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
-
- //! finds global minimum and maximum array elements and returns their values with locations
- // support all C1 types
-
- CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
- const oclMat &mask = oclMat());
-
- //! counts non-zero array elements
- // support all types
- CV_EXPORTS int countNonZero(const oclMat &src);
-
- //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
- // destination array will have the depth type as lut and the same channels number as source
- //It supports 8UC1 8UC4 only
- CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
-
- //! only 8UC1 and 256 bins is supported now
- CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
- //! only 8UC1 and 256 bins is supported now
- CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
- //! bilateralFilter
- // supports 8UC1 8UC4
- CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
- //! computes exponent of each matrix element (b = e**a)
- // supports only CV_32FC1 type
- CV_EXPORTS void exp(const oclMat &a, oclMat &b);
-
- //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
- // supports only CV_32FC1 type
- CV_EXPORTS void log(const oclMat &a, oclMat &b);
-
- //! computes magnitude of each (x(i), y(i)) vector
- // supports only CV_32F CV_64F type
- CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
- CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
-
- CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
-
- //! computes angle (angle(i)) of each (x(i), y(i)) vector
- // supports only CV_32F CV_64F type
- CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
-
- //! the function raises every element of tne input array to p
- //! support only CV_32F CV_64F type
- CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
-
- //! converts Cartesian coordinates to polar
- // supports only CV_32F CV_64F type
- CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
-
- //! converts polar coordinates to Cartesian
- // supports only CV_32F CV_64F type
- CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
-
- //! perfroms per-elements bit-wise inversion
- // supports all types
- CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
- //! calculates per-element bit-wise disjunction of two arrays
- // supports all types
- CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
- CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
- //! calculates per-element bit-wise conjunction of two arrays
- // supports all types
- CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
- CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
- //! calculates per-element bit-wise "exclusive or" operation
- // supports all types
- CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
- CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
-
- //! Logical operators
- CV_EXPORTS oclMat operator ~ (const oclMat &);
- CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
- CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
- CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
-
-
- //! Mathematics operators
- CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
- CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
- CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
- CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
-
- //! computes convolution of two images
- //! support only CV_32FC1 type
- CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
-
- CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
-
- //////////////////////////////// Filter Engine ////////////////////////////////
-
- /*!
- The Base Class for 1D or Row-wise Filters
-
- This is the base class for linear or non-linear filters that process 1D data.
- In particular, such filters are used for the "horizontal" filtering parts in separable filters.
- */
- class CV_EXPORTS BaseRowFilter_GPU
- {
- public:
- BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
- virtual ~BaseRowFilter_GPU() {}
- virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- int ksize, anchor, bordertype;
- };
-
- /*!
- The Base Class for Column-wise Filters
-
- This is the base class for linear or non-linear filters that process columns of 2D arrays.
- Such filters are used for the "vertical" filtering parts in separable filters.
- */
- class CV_EXPORTS BaseColumnFilter_GPU
- {
- public:
- BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
- virtual ~BaseColumnFilter_GPU() {}
- virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- int ksize, anchor, bordertype;
- };
-
- /*!
- The Base Class for Non-Separable 2D Filters.
-
- This is the base class for linear or non-linear 2D filters.
- */
- class CV_EXPORTS BaseFilter_GPU
- {
- public:
- BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
- : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
- virtual ~BaseFilter_GPU() {}
- virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- Size ksize;
- Point anchor;
- int borderType;
- };
-
- /*!
- The Base Class for Filter Engine.
-
- The class can be used to apply an arbitrary filtering operation to an image.
- It contains all the necessary intermediate buffers.
- */
- class CV_EXPORTS FilterEngine_GPU
- {
- public:
- virtual ~FilterEngine_GPU() {}
-
- virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
- };
-
- //! returns the non-separable filter engine with the specified filter
- CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
-
- //! returns the primitive row filter with the specified kernel
- CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
- int anchor = -1, int bordertype = BORDER_DEFAULT);
-
- //! returns the primitive column filter with the specified kernel
- CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
- int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
-
- //! returns the separable linear filter engine
- CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
- const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
-
- //! returns the separable filter engine with the specified filters
- CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
- const Ptr<BaseColumnFilter_GPU> &columnFilter);
-
- //! returns the Gaussian filter engine
- CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
-
- //! returns filter engine for the generalized Sobel operator
- CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
-
- //! applies Laplacian operator to the image
- // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
- CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
-
- //! returns 2D box filter
- // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
- CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
- const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
- //! returns box filter engine
- CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
- const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
- //! returns 2D filter with the specified kernel
- // supports CV_8UC1 and CV_8UC4 types
- CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
- Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
- //! returns the non-separable linear filter engine
- CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
- const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
- //! smooths the image using the normalized box filter
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
- CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
- Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
- //! returns 2D morphological filter
- //! only MORPH_ERODE and MORPH_DILATE are supported
- // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
- CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
- Point anchor = Point(-1, -1));
-
- //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
- CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
- const Point &anchor = Point(-1, -1), int iterations = 1);
-
- //! a synonym for normalized box filter
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
- int borderType = BORDER_CONSTANT)
- {
- boxFilter(src, dst, -1, ksize, anchor, borderType);
- }
-
- //! applies non-separable 2D linear filter to the image
- CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
- Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
- //! applies separable 2D linear filter to the image
- CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
- Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
-
- //! applies generalized Sobel operator to the image
- // dst.type must equalize src.type
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
-
- //! applies the vertical or horizontal Scharr operator to the image
- // dst.type must equalize src.type
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
-
- //! smooths the image using Gaussian filter.
- // dst.type must equalize src.type
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
-
- //! erodes the image (applies the local minimum operator)
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
-
- int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
-
-
- //! dilates the image (applies the local maximum operator)
- // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
-
- int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
-
-
- //! applies an advanced morphological operation to the image
- CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
-
- int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
-
-
- ////////////////////////////// Image processing //////////////////////////////
- //! Does mean shift filtering on GPU.
- CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
- TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
-
- //! Does mean shift procedure on GPU.
- CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
- TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
-
- //! Does mean shift segmentation with elimiation of small regions.
- CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
- TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
-
- //! applies fixed threshold to the image.
- // supports CV_8UC1 and CV_32FC1 data type
- // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
- CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
-
- //! resizes the image
- // Supports INTER_NEAREST, INTER_LINEAR
- // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
-
- //! Applies a generic geometrical transformation to an image.
-
- // Supports INTER_NEAREST, INTER_LINEAR.
-
- // Map1 supports CV_16SC2, CV_32FC2 types.
-
- // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
-
- CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
-
- //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
- // supports CV_8UC1, CV_8UC4, CV_32SC1 types
- CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
-
- //! Smoothes image using median filter
- // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
- CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
-
- //! warps the image using affine transformation
- // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
- // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
-
- //! warps the image using perspective transformation
- // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
- // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
-
- //! computes the integral image and integral for the squared image
- // sum will have CV_32S type, sqsum - CV32F type
- // supports only CV_8UC1 source type
- CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
- CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
- CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
- CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
-
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
- class CV_EXPORTS_W OclCascadeClassifier : public cv::CascadeClassifier
- {
- public:
- OclCascadeClassifier() {};
- ~OclCascadeClassifier() {};
-
- CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
- int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
- };
-
- class CV_EXPORTS OclCascadeClassifierBuf : public cv::CascadeClassifier
- {
- public:
- OclCascadeClassifierBuf() :
- m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
-
- ~OclCascadeClassifierBuf() {}
-
- void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
- double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
- Size minSize = Size(), Size maxSize = Size());
- void release();
-
- private:
- void Init(const int rows, const int cols, double scaleFactor, int flags,
- const int outputsz, const size_t localThreads[],
- CvSize minSize, CvSize maxSize);
- void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
- void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
- const double scaleFactor, const size_t localThreads[],
- CvSize minSize, CvSize maxSize);
- void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
-
- int m_rows;
- int m_cols;
- int m_flags;
- int m_loopcount;
- int m_nodenum;
- bool findBiggestObject;
- bool initialized;
- double m_scaleFactor;
- Size m_minSize;
- Size m_maxSize;
- vector<CvSize> sizev;
- vector<float> scalev;
- oclMat gimg1, gsum, gsqsum;
- void * buffers;
- };
-
-
- /////////////////////////////// Pyramid /////////////////////////////////////
- CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
-
- //! upsamples the source image and then smoothes it
- CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
-
- //! performs linear blending of two images
- //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
- // supports only CV_8UC1 source type
- CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
-
- //! computes vertical sum, supports only CV_32FC1 images
- CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
-
- ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
- struct CV_EXPORTS MatchTemplateBuf
- {
- Size user_block_size;
- oclMat imagef, templf;
- std::vector<oclMat> images;
- std::vector<oclMat> image_sums;
- std::vector<oclMat> image_sqsums;
- };
-
- //! computes the proximity map for the raster template and the image where the template is searched for
- // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
- // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
- CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
-
- //! computes the proximity map for the raster template and the image where the template is searched for
- // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
- // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
- CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
-
- ///////////////////////////////////////////// Canny /////////////////////////////////////////////
- struct CV_EXPORTS CannyBuf;
- //! compute edges of the input image using Canny operator
- // Support CV_8UC1 only
- CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
- CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
- CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
- CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
-
- struct CV_EXPORTS CannyBuf
- {
- CannyBuf() : counter(NULL) {}
- ~CannyBuf()
- {
- release();
- }
- explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
- {
- create(image_size, apperture_size);
- }
- CannyBuf(const oclMat &dx_, const oclMat &dy_);
-
- void create(const Size &image_size, int apperture_size = 3);
- void release();
- oclMat dx, dy;
- oclMat dx_buf, dy_buf;
- oclMat edgeBuf;
- oclMat trackBuf1, trackBuf2;
- void *counter;
- Ptr<FilterEngine_GPU> filterDX, filterDY;
- };
-
- ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
- //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
- //! Param dft_size is the size of DFT transform.
- //!
- //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
- // support src type of CV32FC1, CV32FC2
- // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
- // dft_size is the size of original input, which is used for transformation from complex to real.
- // dft_size must be powers of 2, 3 and 5
- // real to complex dft requires at least v1.8 clAmdFft
- // real to complex dft output is not the same with cpu version
- // real to complex and complex to real does not support DFT_ROWS
- CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
-
- //! implements generalized matrix product algorithm GEMM from BLAS
- // The functionality requires clAmdBlas library
- // only support type CV_32FC1
- // flag GEMM_3_T is not supported
- CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
- const oclMat &src3, double beta, oclMat &dst, int flags = 0);
-
- //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
- struct CV_EXPORTS HOGDescriptor
- {
- enum { DEFAULT_WIN_SIGMA = -1 };
- enum { DEFAULT_NLEVELS = 64 };
- enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
- HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
- Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
- int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
- double threshold_L2hys = 0.2, bool gamma_correction = true,
- int nlevels = DEFAULT_NLEVELS);
-
- size_t getDescriptorSize() const;
- size_t getBlockHistogramSize() const;
- void setSVMDetector(const vector<float> &detector);
- static vector<float> getDefaultPeopleDetector();
- static vector<float> getPeopleDetector48x96();
- static vector<float> getPeopleDetector64x128();
- void detect(const oclMat &img, vector<Point> &found_locations,
- double hit_threshold = 0, Size win_stride = Size(),
- Size padding = Size());
- void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
- double hit_threshold = 0, Size win_stride = Size(),
- Size padding = Size(), double scale0 = 1.05,
- int group_threshold = 2);
- void getDescriptors(const oclMat &img, Size win_stride,
- oclMat &descriptors,
- int descr_format = DESCR_FORMAT_COL_BY_COL);
- Size win_size;
- Size block_size;
- Size block_stride;
- Size cell_size;
-
- int nbins;
- double win_sigma;
- double threshold_L2hys;
- bool gamma_correction;
- int nlevels;
-
- protected:
- // initialize buffers; only need to do once in case of multiscale detection
- void init_buffer(const oclMat &img, Size win_stride);
- void computeBlockHistograms(const oclMat &img);
- void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
- double getWinSigma() const;
- bool checkDetectorSize() const;
-
- static int numPartsWithin(int size, int part_size, int stride);
- static Size numPartsWithin(Size size, Size part_size, Size stride);
-
- // Coefficients of the separating plane
- float free_coef;
- oclMat detector;
- // Results of the last classification step
- oclMat labels;
- Mat labels_host;
- // Results of the last histogram evaluation step
- oclMat block_hists;
- // Gradients conputation results
- oclMat grad, qangle;
- // scaled image
- oclMat image_scale;
- // effect size of input image (might be different from original size after scaling)
- Size effect_size;
- };
-
-
- ////////////////////////feature2d_ocl/////////////////
- /****************************************************************************************\
- * Distance *
- \****************************************************************************************/
- template<typename T>
- struct CV_EXPORTS Accumulator
- {
- typedef T Type;
- };
- template<> struct Accumulator<unsigned char>
- {
- typedef float Type;
- };
- template<> struct Accumulator<unsigned short>
- {
- typedef float Type;
- };
- template<> struct Accumulator<char>
- {
- typedef float Type;
- };
- template<> struct Accumulator<short>
- {
- typedef float Type;
- };
-
- /*
- * Manhattan distance (city block distance) functor
- */
- template<class T>
- struct CV_EXPORTS L1
- {
- enum { normType = NORM_L1 };
- typedef T ValueType;
- typedef typename Accumulator<T>::Type ResultType;
-
- ResultType operator()( const T *a, const T *b, int size ) const
- {
- return normL1<ValueType, ResultType>(a, b, size);
- }
- };
-
- /*
- * Euclidean distance functor
- */
- template<class T>
- struct CV_EXPORTS L2
- {
- enum { normType = NORM_L2 };
- typedef T ValueType;
- typedef typename Accumulator<T>::Type ResultType;
-
- ResultType operator()( const T *a, const T *b, int size ) const
- {
- return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
- }
- };
-
- /*
- * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
- * bit count of A exclusive XOR'ed with B
- */
- struct CV_EXPORTS Hamming
- {
- enum { normType = NORM_HAMMING };
- typedef unsigned char ValueType;
- typedef int ResultType;
-
- /** this will count the bits in a ^ b
- */
- ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
- {
- return normHamming(a, b, size);
- }
- };
-
- ////////////////////////////////// BruteForceMatcher //////////////////////////////////
-
- class CV_EXPORTS BruteForceMatcher_OCL_base
- {
- public:
- enum DistType {L1Dist = 0, L2Dist, HammingDist};
- explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
- // Add descriptors to train descriptor collection
- void add(const std::vector<oclMat> &descCollection);
- // Get train descriptors collection
- const std::vector<oclMat> &getTrainDescriptors() const;
- // Clear train descriptors collection
- void clear();
- // Return true if there are not train descriptors in collection
- bool empty() const;
-
- // Return true if the matcher supports mask in match methods
- bool isMaskSupported() const;
-
- // Find one best match for each query descriptor
- void matchSingle(const oclMat &query, const oclMat &train,
- oclMat &trainIdx, oclMat &distance,
- const oclMat &mask = oclMat());
-
- // Download trainIdx and distance and convert it to CPU vector with DMatch
- static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
- // Convert trainIdx and distance to vector with DMatch
- static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
-
- // Find one best match for each query descriptor
- void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
-
- // Make gpu collection of trains and masks in suitable format for matchCollection function
- void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
-
-
- // Find one best match from train collection for each query descriptor
- void matchCollection(const oclMat &query, const oclMat &trainCollection,
- oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
- const oclMat &masks = oclMat());
-
- // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
- static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
- // Convert trainIdx, imgIdx and distance to vector with DMatch
- static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
-
- // Find one best match from train collection for each query descriptor.
- void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
-
- // Find k best matches for each query descriptor (in increasing order of distances)
- void knnMatchSingle(const oclMat &query, const oclMat &train,
- oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
- const oclMat &mask = oclMat());
-
- // Download trainIdx and distance and convert it to vector with DMatch
- // compactResult is used when mask is not empty. If compactResult is false matches
- // vector will have the same size as queryDescriptors rows. If compactResult is true
- // matches vector will not contain matches for fully masked out query descriptors.
- static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
- // Convert trainIdx and distance to vector with DMatch
- static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
- // Find k best matches for each query descriptor (in increasing order of distances).
- // compactResult is used when mask is not empty. If compactResult is false matches
- // vector will have the same size as queryDescriptors rows. If compactResult is true
- // matches vector will not contain matches for fully masked out query descriptors.
- void knnMatch(const oclMat &query, const oclMat &train,
- std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
- bool compactResult = false);
-
- // Find k best matches from train collection for each query descriptor (in increasing order of distances)
- void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
- oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
- const oclMat &maskCollection = oclMat());
-
- // Download trainIdx and distance and convert it to vector with DMatch
- // compactResult is used when mask is not empty. If compactResult is false matches
- // vector will have the same size as queryDescriptors rows. If compactResult is true
- // matches vector will not contain matches for fully masked out query descriptors.
- static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
- // Convert trainIdx and distance to vector with DMatch
- static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
- // Find k best matches for each query descriptor (in increasing order of distances).
- // compactResult is used when mask is not empty. If compactResult is false matches
- // vector will have the same size as queryDescriptors rows. If compactResult is true
- // matches vector will not contain matches for fully masked out query descriptors.
- void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
- const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
-
- // Find best matches for each query descriptor which have distance less than maxDistance.
- // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
- // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
- // because it didn't have enough memory.
- // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
- // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
- // Matches doesn't sorted.
- void radiusMatchSingle(const oclMat &query, const oclMat &train,
- oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
- const oclMat &mask = oclMat());
-
- // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
- // matches will be sorted in increasing order of distances.
- // compactResult is used when mask is not empty. If compactResult is false matches
- // vector will have the same size as queryDescriptors rows. If compactResult is true
- // matches vector will not contain matches for fully masked out query descriptors.
- static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- // Convert trainIdx, nMatches and distance to vector with DMatch.
- static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- // Find best matches for each query descriptor which have distance less than maxDistance
- // in increasing order of distances).
- void radiusMatch(const oclMat &query, const oclMat &train,
- std::vector< std::vector<DMatch> > &matches, float maxDistance,
- const oclMat &mask = oclMat(), bool compactResult = false);
- // Find best matches for each query descriptor which have distance less than maxDistance.
- // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
- // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
- // Matches doesn't sorted.
- void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
- const std::vector<oclMat> &masks = std::vector<oclMat>());
- // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
- // matches will be sorted in increasing order of distances.
- // compactResult is used when mask is not empty. If compactResult is false matches
- // vector will have the same size as queryDescriptors rows. If compactResult is true
- // matches vector will not contain matches for fully masked out query descriptors.
- static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- // Convert trainIdx, nMatches and distance to vector with DMatch.
- static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
- std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- // Find best matches from train collection for each query descriptor which have distance less than
- // maxDistance (in increasing order of distances).
- void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
- const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
- DistType distType;
- private:
- std::vector<oclMat> trainDescCollection;
- };
-
- template <class Distance>
- class CV_EXPORTS BruteForceMatcher_OCL;
-
- template <typename T>
- class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
- {
- public:
- explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
- explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
- };
-
- template <typename T>
- class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
- {
- public:
- explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
- explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
- };
-
- template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
- {
- public:
- explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
- explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
- };
-
- class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
- {
- public:
- explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
- };
- /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
- class CV_EXPORTS PyrLKOpticalFlow
- {
- public:
- PyrLKOpticalFlow()
- {
- winSize = Size(21, 21);
- maxLevel = 3;
- iters = 30;
- derivLambda = 0.5;
- useInitialFlow = false;
- minEigThreshold = 1e-4f;
- getMinEigenVals = false;
- isDeviceArch11_ = false;
- }
-
- void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
- oclMat &status, oclMat *err = 0);
- void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
- Size winSize;
- int maxLevel;
- int iters;
- double derivLambda;
- bool useInitialFlow;
- float minEigThreshold;
- bool getMinEigenVals;
- void releaseMemory()
- {
- dx_calcBuf_.release();
- dy_calcBuf_.release();
-
- prevPyr_.clear();
- nextPyr_.clear();
-
- dx_buf_.release();
- dy_buf_.release();
- }
- private:
- void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
- void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
-
- oclMat dx_calcBuf_;
- oclMat dy_calcBuf_;
-
- vector<oclMat> prevPyr_;
- vector<oclMat> nextPyr_;
-
- oclMat dx_buf_;
- oclMat dy_buf_;
- oclMat uPyr_[2];
- oclMat vPyr_[2];
- bool isDeviceArch11_;
- };
- //////////////// build warping maps ////////////////////
- //! builds plane warping maps
- CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
- //! builds cylindrical warping maps
- CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
- //! builds spherical warping maps
- CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
- //! builds Affine warping maps
- CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
-
- //! builds Perspective warping maps
- CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
-
- ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
- //! Interpolate frames (images) using provided optical flow (displacement field).
- //! frame0 - frame 0 (32-bit floating point images, single channel)
- //! frame1 - frame 1 (the same type and size)
- //! fu - forward horizontal displacement
- //! fv - forward vertical displacement
- //! bu - backward horizontal displacement
- //! bv - backward vertical displacement
- //! pos - new frame position
- //! newFrame - new frame
- //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
- //! occlusion masks 0, occlusion masks 1,
- //! interpolated forward flow 0, interpolated forward flow 1,
- //! interpolated backward flow 0, interpolated backward flow 1
- //!
- CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
- const oclMat &fu, const oclMat &fv,
- const oclMat &bu, const oclMat &bv,
- float pos, oclMat &newFrame, oclMat &buf);
-
- //! computes moments of the rasterized shape or a vector of points
- CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
-
- class CV_EXPORTS StereoBM_OCL
- {
- public:
- enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
-
- enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
-
- //! the default constructor
- StereoBM_OCL();
- //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
- StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
-
- //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
- //! Output disparity has CV_8U type.
- void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
-
- //! Some heuristics that tries to estmate
- // if current GPU will be faster then CPU in this algorithm.
- // It queries current active device.
- static bool checkIfGpuCallReasonable();
-
- int preset;
- int ndisp;
- int winSize;
-
- // If avergeTexThreshold == 0 => post procesing is disabled
- // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
- // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
- // i.e. input left image is low textured.
- float avergeTexThreshold;
- private:
- oclMat minSSD, leBuf, riBuf;
- };
-
- class CV_EXPORTS StereoBeliefPropagation
- {
- public:
- enum { DEFAULT_NDISP = 64 };
- enum { DEFAULT_ITERS = 5 };
- enum { DEFAULT_LEVELS = 5 };
- static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
- explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
- int iters = DEFAULT_ITERS,
- int levels = DEFAULT_LEVELS,
- int msg_type = CV_16S);
- StereoBeliefPropagation(int ndisp, int iters, int levels,
- float max_data_term, float data_weight,
- float max_disc_term, float disc_single_jump,
- int msg_type = CV_32F);
- void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
- void operator()(const oclMat &data, oclMat &disparity);
- int ndisp;
- int iters;
- int levels;
- float max_data_term;
- float data_weight;
- float max_disc_term;
- float disc_single_jump;
- int msg_type;
- private:
- oclMat u, d, l, r, u2, d2, l2, r2;
- std::vector<oclMat> datas;
- oclMat out;
- };
-
- class CV_EXPORTS StereoConstantSpaceBP
- {
- public:
- enum { DEFAULT_NDISP = 128 };
- enum { DEFAULT_ITERS = 8 };
- enum { DEFAULT_LEVELS = 4 };
- enum { DEFAULT_NR_PLANE = 4 };
- static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
- explicit StereoConstantSpaceBP(
- int ndisp = DEFAULT_NDISP,
- int iters = DEFAULT_ITERS,
- int levels = DEFAULT_LEVELS,
- int nr_plane = DEFAULT_NR_PLANE,
- int msg_type = CV_32F);
- StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
- float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
- int min_disp_th = 0,
- int msg_type = CV_32F);
- void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
- int ndisp;
- int iters;
- int levels;
- int nr_plane;
- float max_data_term;
- float data_weight;
- float max_disc_term;
- float disc_single_jump;
- int min_disp_th;
- int msg_type;
- bool use_local_init_data_cost;
- private:
- oclMat u[2], d[2], l[2], r[2];
- oclMat disp_selected_pyr[2];
- oclMat data_cost;
- oclMat data_cost_selected;
- oclMat temp;
- oclMat out;
- };
-
- // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
- //
- // see reference:
- // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
- // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
- class CV_EXPORTS OpticalFlowDual_TVL1_OCL
- {
- public:
- OpticalFlowDual_TVL1_OCL();
-
- void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
-
- void collectGarbage();
-
- /**
- * Time step of the numerical scheme.
- */
- double tau;
-
- /**
- * Weight parameter for the data term, attachment parameter.
- * This is the most relevant parameter, which determines the smoothness of the output.
- * The smaller this parameter is, the smoother the solutions we obtain.
- * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
- */
- double lambda;
-
- /**
- * Weight parameter for (u - v)^2, tightness parameter.
- * It serves as a link between the attachment and the regularization terms.
- * In theory, it should have a small value in order to maintain both parts in correspondence.
- * The method is stable for a large range of values of this parameter.
- */
- double theta;
-
- /**
- * Number of scales used to create the pyramid of images.
- */
- int nscales;
-
- /**
- * Number of warpings per scale.
- * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
- * This is a parameter that assures the stability of the method.
- * It also affects the running time, so it is a compromise between speed and accuracy.
- */
- int warps;
-
- /**
- * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
- * A small value will yield more accurate solutions at the expense of a slower convergence.
- */
- double epsilon;
-
- /**
- * Stopping criterion iterations number used in the numerical scheme.
- */
- int iterations;
-
- bool useInitialFlow;
-
- private:
- void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
-
- std::vector<oclMat> I0s;
- std::vector<oclMat> I1s;
- std::vector<oclMat> u1s;
- std::vector<oclMat> u2s;
-
- oclMat I1x_buf;
- oclMat I1y_buf;
-
- oclMat I1w_buf;
- oclMat I1wx_buf;
- oclMat I1wy_buf;
-
- oclMat grad_buf;
- oclMat rho_c_buf;
-
- oclMat p11_buf;
- oclMat p12_buf;
- oclMat p21_buf;
- oclMat p22_buf;
-
- oclMat diff_buf;
- oclMat norm_buf;
- };
- }
-}
-#if defined _MSC_VER && _MSC_VER >= 1200
-# pragma warning( push)
-# pragma warning( disable: 4267)
-#endif
-#include "opencv2/ocl/matrix_operations.hpp"
-#if defined _MSC_VER && _MSC_VER >= 1200
-# pragma warning( pop)
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
#endif
- #include "opencv2/ocl.hpp"
-#endif /* __OPENCV_GPU_HPP__ */
++#include "opencv2/ocl.hpp"
/////////////////////// add subtract multiply divide /////////////////////////
//////////////////////////////////////////////////////////////////////////////
template<typename T>
--void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
- string kernelName, const char **kernelString, void *_scalar, int op_type = 0)
++void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ String kernelName, const char **kernelString, void *_scalar, int op_type = 0)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
}
}
--static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
- string kernelName, const char **kernelString, int op_type = 0)
++static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ String kernelName, const char **kernelString, int op_type = 0)
{
arithmetic_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL, op_type);
}
--static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask,
- string kernelName, const char **kernelString, int op_type = 0)
++static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask,
+ String kernelName, const char **kernelString, int op_type = 0)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
}
void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, const oclMat &mask)
{
- string kernelName = mask.data ? "arithm_s_add_with_mask" : "arithm_s_add";
+ String kernelName = mask.data ? "arithm_s_add_with_mask" : "arithm_s_add";
const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar;
+
arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, -1);
}
- string kernelName = "arithm_muls";
+ void cv::ocl::multiply(double scalar, const oclMat &src, oclMat &dst)
+ {
++ String kernelName = "arithm_muls";
+ arithmetic_scalar_run( src, dst, kernelName, &arithm_mul, scalar);
+ }
void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst)
{
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
template<typename T>
- void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName, const char **kernelString, void *_scalar)
-void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName,
++void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName,
+ const char **kernelString, void *_scalar, const char* _opt = NULL)
{
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
{
double scalar1 = *((double *)_scalar);
T scalar = (T)scalar1;
- args.push_back( make_pair( sizeof(T), (void *)&scalar ));
+ args.push_back( std::make_pair( sizeof(T), (void *)&scalar ));
}
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth, _opt);
}
- static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName, const char **kernelString)
+ static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
- string kernelName, const char **kernelString, const char* _opt = NULL)
++ String kernelName, const char **kernelString, const char* _opt = NULL)
{
- bitwise_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL);
+ bitwise_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL, _opt);
}
- static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString)
+ static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
- const oclMat &mask, string kernelName, const char **kernelString, const char* _opt = NULL)
++ const oclMat &mask, String kernelName, const char **kernelString, const char* _opt = NULL)
{
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
};
int dst_step1 = dst.cols * dst.elemSize();
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mask.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&mask.offset ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset ));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.offset ));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, _opt);
}
template <typename WT , typename CL_WT>
- void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar)
+ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst,
- const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt = NULL)
++ const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar, const char* opt = NULL)
{
dst.create(src1.size(), src1.type());
if(isMatSubScalar != 0)
{
isMatSubScalar = isMatSubScalar > 0 ? 1 : 0;
- args.push_back( make_pair( sizeof(cl_int) , (void *)&isMatSubScalar));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&isMatSubScalar));
}
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, opt);
}
- typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar);
+ typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst,
- const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt);
++ const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar, const char* opt);
- static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar)
+ static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
- const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt)
++ const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar, const char* opt)
{
static BitwiseFuncS tab[8] =
{
};
BitwiseFuncS func = tab[src1.depth()];
if(func == 0)
- cv::ocl::error("Unsupported arithmetic operation", __FILE__, __LINE__);
+ cv::error(Error::StsBadArg, "Unsupported arithmetic operation", "", __FILE__, __LINE__);
- func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar);
+ func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar, opt);
}
- static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString)
+ static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
- const oclMat &mask, string kernelName, const char **kernelString, const char * opt = NULL)
++ const oclMat &mask, String kernelName, const char **kernelString, const char * opt = NULL)
{
- bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0);
+ bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0, opt);
}
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
// dst.create(src1.size(),src1.type());
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
- cout << "Selected device do not support double" << endl;
+ std::cout << "Selected device do not support double" << std::endl;
return;
}
- oclMat emptyMat;
- String kernelName = mask.empty() ? "arithm_bitwise_or" : "arithm_bitwise_or_with_mask";
+
- string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
++ String kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
+ static const char opt [] = "-D OP_BINARY=|";
if (mask.empty())
- bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_or);
+ bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
- bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_or_mask);
+ bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
}
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
- cout << "Selected device do not support double" << endl;
+ std::cout << "Selected device do not support double" << std::endl;
return;
}
- String kernelName = mask.data ? "arithm_s_bitwise_or_with_mask" : "arithm_s_bitwise_or";
+ static const char opt [] = "-D OP_BINARY=|";
- string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
++ String kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
if (mask.data)
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_or_scalar_mask);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_or_scalar);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
}
void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
}
oclMat emptyMat;
- String kernelName = mask.empty() ? "arithm_bitwise_and" : "arithm_bitwise_and_with_mask";
- string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
++ String kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
+ static const char opt [] = "-D OP_BINARY=&";
if (mask.empty())
- bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_and);
+ bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
- bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_mask);
+ bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
}
void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
- cout << "Selected device do not support double" << endl;
+ std::cout << "Selected device do not support double" << std::endl;
return;
}
- String kernelName = mask.data ? "arithm_s_bitwise_and_with_mask" : "arithm_s_bitwise_and";
+ static const char opt [] = "-D OP_BINARY=&";
- string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
++ String kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
if (mask.data)
- bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_scalar_mask);
+ bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
- bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_scalar);
+ bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
}
void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
- cout << "Selected device do not support double" << endl;
+ std::cout << "Selected device do not support double" << std::endl;
return;
}
- oclMat emptyMat;
- String kernelName = mask.empty() ? "arithm_bitwise_xor" : "arithm_bitwise_xor_with_mask";
- string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
++ String kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
+ static const char opt [] = "-D OP_BINARY=^";
if (mask.empty())
- bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_xor);
+ bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
else
- bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_mask);
+ bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
}
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
- cout << "Selected device do not support double" << endl;
+ std::cout << "Selected device do not support double" << std::endl;
return;
}
- String kernelName = mask.data ? "arithm_s_bitwise_xor_with_mask" : "arithm_s_bitwise_xor";
- string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
++ String kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
+ static const char opt [] = "-D OP_BINARY=^";
if (mask.data)
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar_mask);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
else
- bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar);
+ bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
}
oclMat cv::ocl::operator ~ (const oclMat &src)
if( CV_MAT_CN(gimg.type()) > 1 )
{
- cvtColor( gimg, gtemp, CV_BGR2GRAY );
+ oclMat gtemp;
+ cvtColor( gimg, gtemp, COLOR_BGR2GRAY );
gimg = gtemp;
}
int totalheight = 0;
int indexy = 0;
CvSize sz;
- //t = (double)cvGetTickCount();
- vector<CvSize> sizev;
- vector<float> scalev;
+ std::vector<CvSize> sizev;
+ std::vector<float> scalev;
for(factor = 1.f;; factor *= scaleFactor)
{
- CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
+ CvSize winSize( cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) );
sz.width = cvRound( gimg.cols / factor ) + 1;
sz.height = cvRound( gimg.rows / factor ) + 1;
- CvSize sz1 = { sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 };
+ CvSize sz1( sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 );
if( sz1.width <= 0 || sz1.height <= 0 )
break;
pq.s[3] = gcascade->pq3;
float correction = gcascade->inv_window_area;
- //int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
- //int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
- // openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
- //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer));
-
- vector<pair<size_t, const void *> > args;
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&pixelstep ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
- args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
- args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
- args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
+ std::vector<std::pair<size_t, const void *> > args;
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&pixelstep ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&p ));
+ args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&pq ));
+ args.push_back ( std::make_pair(sizeof(cl_float) , (void *)&correction ));
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
- //t = (double)cvGetTickCount() - t;
- //printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
- //t = (double)cvGetTickCount();
- //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
+
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
for(int i = 0; i < outputsz; i++)
int step = gsum.step / 4;
int startnode = 0;
int splitstage = 3;
- int splitnode = stage[0].count + stage[1].count + stage[2].count;
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
- //openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
- //openCLVerifyCall(status);
scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
- //openCLVerifyCall(status);
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(qu, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(qu, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
- //int argcount = 0;
+ openCLSafeCall(clEnqueueWriteBuffer(qu, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
- vector<pair<size_t, const void *> > args;
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
- args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
- args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
+ std::vector<std::pair<size_t, const void *> > args;
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
++ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
++ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&step ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
- args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
+ args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&nodenum ));
-
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
- //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
- candidate = (int *)clEnqueueMapBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
+ candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
for(int i = 0; i < outputsz; i++)
{
else
rweights.resize(rectList.size(), 0);
- }
+ GenResult(faces, rectList, rweights);
+ }
+
+ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
+ double scaleFactor, int flags,
+ const int outputsz, const size_t localThreads[],
+ CvSize minSize, CvSize maxSize)
+ {
+ CvHaarClassifierCascade *cascade = oldCascade;
+
+ if( !CV_IS_HAAR_CLASSIFIER(cascade) )
+ CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
+
+ if( scaleFactor <= 1 )
+ CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
+
+ if( cols < minSize.width || rows < minSize.height )
+ CV_Error(CV_StsError, "Image too small");
+
+ int datasize=0;
+ int totalclassifier=0;
+
+ if( !cascade->hid_cascade )
+ gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
+
+ if( maxSize.height == 0 || maxSize.width == 0 )
+ {
+ maxSize.height = rows;
+ maxSize.width = cols;
+ }
+
+ findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
+ if( findBiggestObject )
+ flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
+
+ CreateBaseBufs(datasize, totalclassifier, flags, outputsz);
+ CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize);
+
+ m_scaleFactor = scaleFactor;
+ m_rows = rows;
+ m_cols = cols;
+ m_flags = flags;
+ m_minSize = minSize;
+ m_maxSize = maxSize;
+
+ initialized = true;
+ }
+
+ void cv::ocl::OclCascadeClassifierBuf::CreateBaseBufs(const int datasize, const int totalclassifier,
+ const int flags, const int outputsz)
+ {
+ if (!initialized)
+ {
+ buffers = malloc(sizeof(OclBuffers));
+
+ size_t tempSize =
+ sizeof(GpuHidHaarStageClassifier) * ((GpuHidHaarClassifierCascade *)oldCascade->hid_cascade)->count;
+ m_nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) - tempSize - sizeof(GpuHidHaarClassifier) * totalclassifier)
+ / sizeof(GpuHidHaarTreeNode);
+
+ ((OclBuffers *)buffers)->stagebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, tempSize);
+ ((OclBuffers *)buffers)->nodebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, m_nodenum * sizeof(GpuHidHaarTreeNode));
+ }
+
+ if (initialized
+ && ((m_flags & CV_HAAR_SCALE_IMAGE) ^ (flags & CV_HAAR_SCALE_IMAGE)))
+ {
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
+ }
+
+ if (flags & CV_HAAR_SCALE_IMAGE)
+ {
+ ((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
+ CL_MEM_WRITE_ONLY,
+ 4 * sizeof(int) * outputsz);
+ }
+ else
+ {
+ ((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
+ CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+ 4 * sizeof(int) * outputsz);
+ }
+ }
+
+ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
+ const int rows, const int cols, const int flags,
+ const double scaleFactor, const size_t localThreads[],
+ CvSize minSize, CvSize maxSize)
+ {
+ if (initialized)
+ {
+ if ((m_flags & CV_HAAR_SCALE_IMAGE) && !(flags & CV_HAAR_SCALE_IMAGE))
+ {
+ gimg1.release();
+ gsum.release();
+ gsqsum.release();
- }
++ }
+ else if (!(m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
+ {
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
+ }
+ else if ((m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
+ {
+ if (fabs(m_scaleFactor - scaleFactor) < 1e-6
+ && (rows == m_rows && cols == m_cols)
+ && (minSize.width == m_minSize.width)
+ && (minSize.height == m_minSize.height)
+ && (maxSize.width == m_maxSize.width)
+ && (maxSize.height == m_maxSize.height))
+ {
+ return;
+ }
++ }
+ else
+ {
+ if (fabs(m_scaleFactor - scaleFactor) < 1e-6
+ && (rows == m_rows && cols == m_cols)
+ && (minSize.width == m_minSize.width)
+ && (minSize.height == m_minSize.height)
+ && (maxSize.width == m_maxSize.width)
+ && (maxSize.height == m_maxSize.height))
+ {
+ return;
+ }
+ else
+ {
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
+ }
+ }
+ }
+
+ int loopcount;
+ int indexy = 0;
+ int totalheight = 0;
+ double factor;
+ Rect roi;
+ CvSize sz;
+ CvSize winSize0 = oldCascade->orig_window_size;
+ detect_piramid_info *scaleinfo;
+ if (flags & CV_HAAR_SCALE_IMAGE)
+ {
+ for(factor = 1.f;; factor *= scaleFactor)
+ {
+ CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
+ sz.width = cvRound( cols / factor ) + 1;
+ sz.height = cvRound( rows / factor ) + 1;
+ CvSize sz1 = { sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 };
+
+ if( sz1.width <= 0 || sz1.height <= 0 )
+ break;
+ if( winSize.width > maxSize.width || winSize.height > maxSize.height )
+ break;
+ if( winSize.width < minSize.width || winSize.height < minSize.height )
+ continue;
+
+ totalheight += sz.height;
+ sizev.push_back(sz);
+ scalev.push_back(static_cast<float>(factor));
+ }
+
+ loopcount = sizev.size();
+ gimg1.create(rows, cols, CV_8UC1);
+ gsum.create(totalheight + 4, cols + 1, CV_32SC1);
+ gsqsum.create(totalheight + 4, cols + 1, CV_32FC1);
+
+ scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
+ for( int i = 0; i < loopcount; i++ )
+ {
+ sz = sizev[i];
+ roi = Rect(0, indexy, sz.width, sz.height);
+ int width = sz.width - 1 - oldCascade->orig_window_size.width;
+ int height = sz.height - 1 - oldCascade->orig_window_size.height;
+ int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
+ int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
+
+ ((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
+ ((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
+ ((detect_piramid_info *)scaleinfo)[i].imgoff = gsum(roi).offset >> 2;
+ ((detect_piramid_info *)scaleinfo)[i].factor = scalev[i];
+
+ indexy += sz.height;
+ }
+ }
+ else
+ {
+ for(factor = 1;
+ cvRound(factor * winSize0.width) < cols - 10 && cvRound(factor * winSize0.height) < rows - 10;
+ factor *= scaleFactor)
+ {
+ CvSize winSize = { cvRound( winSize0.width * factor ), cvRound( winSize0.height * factor ) };
+ if( winSize.width < minSize.width || winSize.height < minSize.height )
+ {
+ continue;
+ }
+ sizev.push_back(winSize);
+ scalev.push_back(factor);
+ }
+
+ loopcount = scalev.size();
+ if(loopcount == 0)
+ {
+ loopcount = 1;
+ sizev.push_back(minSize);
+ scalev.push_back( min(cvRound(minSize.width / winSize0.width), cvRound(minSize.height / winSize0.height)) );
+ }
+
+ ((OclBuffers *)buffers)->pbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
+ sizeof(cl_int4) * loopcount);
+ ((OclBuffers *)buffers)->correctionbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
+ sizeof(cl_float) * loopcount);
+ ((OclBuffers *)buffers)->newnodebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_WRITE,
+ loopcount * m_nodenum * sizeof(GpuHidHaarTreeNode));
+
+ scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
+ for( int i = 0; i < loopcount; i++ )
+ {
+ sz = sizev[i];
+ factor = scalev[i];
+ int ystep = cvRound(std::max(2., factor));
+ int width = (cols - 1 - sz.width + ystep - 1) / ystep;
+ int height = (rows - 1 - sz.height + ystep - 1) / ystep;
+ int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
+ int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
+
+ ((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
+ ((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
+ ((detect_piramid_info *)scaleinfo)[i].imgoff = 0;
+ ((detect_piramid_info *)scaleinfo)[i].factor = factor;
+ }
+ }
+
+ if (loopcount != m_loopcount)
+ {
+ if (initialized)
+ {
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
+ }
+ ((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
+ }
+
+ openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
+ sizeof(detect_piramid_info)*loopcount,
+ scaleinfo, 0, NULL, NULL));
+ free(scaleinfo);
+
+ m_loopcount = loopcount;
+ }
+
+ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& faces,
+ const std::vector<cv::Rect> &rectList,
+ const std::vector<int> &rweights)
+ {
+ CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) );
if( findBiggestObject && rectList.size() )
{
int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
oclMat t_sum , t_sqsum;
- t_sum.create(src.cols, src.rows, CV_32SC1);
- t_sqsum.create(src.cols, src.rows, CV_32FC1);
-
int w = src.cols + 1, h = src.rows + 1;
- sum.create(h, w, CV_32SC1);
+ int depth;
+ if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255
+ {
+ t_sum.create(src.cols, src.rows, CV_32SC1);
+ sum.create(h, w, CV_32SC1);
+ }
+ else
+ {
+ //Use float to prevent overflow
+ t_sum.create(src.cols, src.rows, CV_32FC1);
+ sum.create(h, w, CV_32FC1);
- }
- t_sqsum.create(src.cols, src.rows, CV_32FC1);
- sqsum.create(h, w, CV_32FC1);
- depth = sum.depth();
- int sum_offset = sum.offset / vlen;
- int sqsum_offset = sqsum.offset / vlen;
-
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
- args.clear();
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
- size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
++ }
++ t_sqsum.create(src.cols, src.rows, CV_32FC1);
+ sqsum.create(h, w, CV_32FC1);
- int sum_offset = sum.offset / vlen, sqsum_offset = sqsum.offset / vlen;
++ depth = sum.depth();
++ int sum_offset = sum.offset / vlen;
++ int sqsum_offset = sqsum.offset / vlen;
+
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, -1);
++ openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
+ args.clear();
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum.step));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
+ size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, -1);
- //std::cout << "tested" << std::endl;
++ openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
}
+
void integral(const oclMat &src, oclMat &sum)
{
CV_Assert(src.type() == CV_8UC1);
int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
oclMat t_sum;
- t_sum.create(src.cols, src.rows, CV_32SC1);
-
int w = src.cols + 1, h = src.rows + 1;
- sum.create(h, w, CV_32SC1);
+ int depth;
+ if(src.cols * src.rows <= 2901 * 2901)
+ {
+ t_sum.create(src.cols, src.rows, CV_32SC1);
+ sum.create(h, w, CV_32SC1);
+ }else
+ {
+ t_sum.create(src.cols, src.rows, CV_32FC1);
+ sum.create(h, w, CV_32FC1);
- }
- depth = sum.depth();
- int sum_offset = sum.offset / vlen;
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
- args.clear();
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
++ }
++ depth = sum.depth();
+ int sum_offset = sum.offset / vlen;
+
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, -1);
++ openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
+ args.clear();
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1};
- openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, -1);
++ openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
+ //std::cout << "tested" << std::endl;
}
/////////////////////// corner //////////////////////////////
{
cl_platform_id oclplatform;
std::vector<cl_device_id> devices;
- std::vector<std::string> devName;
- std::string clVersion;
+ std::vector<String> devName;
+ String platName;
++ String clVersion;
+
cl_context oclcontext;
cl_command_queue clCmdQueue;
int devnum;
int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
{
- if( (mem_type == DEVICE_MEM_PM &&
- if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
- mem_type == DEVICE_MEM_UHP ||
- mem_type == DEVICE_MEM_CHP )
++ if( (mem_type == DEVICE_MEM_PM &&
+ Context::getContext()->impl->unified_memory == 0) )
return -1;
gDeviceMemRW = rw_type;
gDeviceMemType = mem_type;
std::vector<cl_platform_id> platforms(numPlatforms);
openCLSafeCall(clGetPlatformIDs(numPlatforms, &platforms[0], 0));
- char deviceName[256];
int devcienums = 0;
+
+ const static int max_name_length = 256;
+ char deviceName[max_name_length];
+ char plfmName[max_name_length];
+ char clVersion[256];
for (unsigned i = 0; i < numPlatforms; ++i)
{
+
cl_uint numsdev;
cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev);
if(status != CL_DEVICE_NOT_FOUND)
openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, &devices[0], 0));
Info ocltmpinfo;
+ openCLSafeCall(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(plfmName), plfmName, NULL));
+ ocltmpinfo.PlatformName = String(plfmName);
+ ocltmpinfo.impl->platName = String(plfmName);
ocltmpinfo.impl->oclplatform = platforms[i];
+ openCLSafeCall(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(clVersion), clVersion, NULL));
+ ocltmpinfo.impl->clVersion = clVersion;
for(unsigned j = 0; j < numsdev; ++j)
{
ocltmpinfo.impl->devices.push_back(devices[j]);
}
void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
- size_t widthInBytes, size_t height,
- size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
++ size_t widthInBytes, size_t height,
+ DevMemRW rw_type, DevMemType mem_type, void* hptr)
{
cl_int status;
- *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
- widthInBytes * height, 0, &status);
+ if(hptr && (mem_type==DEVICE_MEM_UHP || mem_type==DEVICE_MEM_CHP))
- *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext,
- gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
++ *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext,
++ gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+ widthInBytes * height, hptr, &status);
+ else
+ *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+ widthInBytes * height, 0, &status);
openCLVerifyCall(status);
*pitch = widthInBytes;
}
return impl->double_support == 1;
case CL_UNIFIED_MEM:
return impl->unified_memory == 1;
- return impl->clVersion.find("OpenCL 1.2") != string::npos;
+ case CL_VER_1_2:
++ return impl->clVersion.find("OpenCL 1.2") != String::npos;
default:
return false;
}
}
else
{
- openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
+ // try to use host ptr
+ createEx(wholeSize, m.type(), gDeviceMemRW, gDeviceMemType, m.datastart);
+ if(gDeviceMemType!=DEVICE_MEM_UHP && gDeviceMemType!=DEVICE_MEM_CHP)
- openCLMemcpy2D(clCxt, data, step, m.datastart, m.step,
++ openCLMemcpy2D(clCxt, data, step, m.datastart, m.step,
+ wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
}
rows = m.rows;
}
break;
default:
- CV_Error(CV_StsUnsupportedFormat, "unknown depth");
+ CV_Error(Error::StsUnsupportedFormat, "unknown depth");
}
#ifdef CL_VERSION_1_2
- if(dst.offset == 0 && dst.cols == dst.wholecols)
+ //this enables backwards portability to
+ //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
+ if(Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
+ dst.offset == 0 && dst.cols == dst.wholecols)
{
- clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
- clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
++ clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
+ (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
}
else
+ #endif
{
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
+ args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
+ args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1, compile_option);
+ localThreads, args, -1, -1, compile_option);
}
- #else
- args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
- args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
- openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1, compile_option);
- #endif
}
-static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName)
+static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName)
{
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
- vector<pair<size_t , const void *> > args;
+ std::vector<std::pair<size_t , const void *> > args;
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3];
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
}
- void cv::ocl::oclMat::createEx(Size size, int type,
-void cv::ocl::oclMat::createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type)
++void cv::ocl::oclMat::createEx(Size size, int type,
+ DevMemRW rw_type, DevMemType mem_type, void* hptr)
{
- createEx(size.height, size.width, type, rw_type, mem_type);
+ createEx(size.height, size.width, type, rw_type, mem_type, hptr);
}
void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType);
}
- void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type,
-void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type, DevMemType mem_type)
++void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type,
+ DevMemRW rw_type, DevMemType mem_type, void* hptr)
{
clCxt = Context::getContext();
/* core logic */
size_t esz = elemSize();
void *dev_ptr;
- openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols),
- openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type);
- //openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows);
++ openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols),
+ rows, rw_type, mem_type, hptr);
if (esz * cols == step)
flags |= Mat::CONTINUOUS_FLAG;
CV_Error(-1, "Image forma is not supported");
break;
}
- #if CL_VERSION_1_2
- cl_image_desc desc;
- desc.image_type = CL_MEM_OBJECT_IMAGE2D;
- desc.image_width = mat.cols;
- desc.image_height = mat.rows;
- desc.image_depth = 0;
- desc.image_array_size = 1;
- desc.image_row_pitch = 0;
- desc.image_slice_pitch = 0;
- desc.buffer = NULL;
- desc.num_mip_levels = 0;
- desc.num_samples = 0;
- texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
- #else
- texture = clCreateImage2D(
- (cl_context)mat.clCxt->oclContext(),
- CL_MEM_READ_WRITE,
- &format,
- mat.cols,
- mat.rows,
- 0,
- NULL,
- &err);
+ #ifdef CL_VERSION_1_2
+ //this enables backwards portability to
+ //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
+ if(Context::getContext()->supportsFeature(Context::CL_VER_1_2))
+ {
+ cl_image_desc desc;
+ desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ desc.image_width = mat.cols;
+ desc.image_height = mat.rows;
+ desc.image_depth = 0;
+ desc.image_array_size = 1;
+ desc.image_row_pitch = 0;
+ desc.image_slice_pitch = 0;
+ desc.buffer = NULL;
+ desc.num_mip_levels = 0;
+ desc.num_samples = 0;
- texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
++ texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+ }
+ else
+ #endif
+ {
+ #ifdef __GNUC__
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ #endif
+ texture = clCreateImage2D(
+ (cl_context)mat.clCxt->oclContext(),
+ CL_MEM_READ_WRITE,
+ &format,
+ mat.cols,
+ mat.rows,
+ 0,
+ NULL,
+ &err);
+ #ifdef __GNUC__
+ #pragma GCC diagnostic pop
#endif
+ }
size_t origin[] = { 0, 0, 0 };
size_t region[] = { mat.cols, mat.rows, 1 };
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
- clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
+ clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
- }
+ }
else
{
devData = (cl_mem)mat.data;
--- /dev/null
- ///////////////////////////OpenCL kernel strings///////////////////////////
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ // By downloading, copying, installing or using the software you agree to this license.
+ // If you do not agree to this license, do not download, install,
+ // copy or use the software.
+ //
+ //
+ // License Agreement
+ // For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+ // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // @Authors
+ // Jia Haipeng, jiahaipeng95@gmail.com
+ // Jin Ma, jin@multicorewareinc.com
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ // * Redistribution's of source code must retain the above copyright notice,
+ // this list of conditions and the following disclaimer.
+ //
+ // * Redistribution's in binary form must reproduce the above copyright notice,
+ // this list of conditions and the following disclaimer in the documentation
+ // and/or other oclMaterials provided with the distribution.
+ //
+ // * The name of the copyright holders may not be used to endorse or promote products
+ // derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+
+ #include "precomp.hpp"
+
+ using namespace cv;
+ using namespace cv::ocl;
+ using namespace std;
+
+ #if !defined (HAVE_OPENCL)
+
+ namespace cv
+ {
+ namespace ocl
+ {
+
+ void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int &, int &, int &, int &)
+ {
+ throw_nogpu();
+ }
+ cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int)
+ {
+ throw_nogpu();
+ }
+ cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float,
+ float, float, int, int)
+ {
+ throw_nogpu();
+ }
+
+ void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &, const oclMat &, oclMat &)
+ {
+ throw_nogpu();
+ }
+ }
+ }
+
+ #else /* !defined (HAVE_OPENCL) */
+
+ namespace cv
+ {
+ namespace ocl
+ {
+
- static string get_kernel_name(string kernel_name, int data_type)
++ ///////////////////////////OpenCL kernel Strings///////////////////////////
+ extern const char *stereocsbp;
+ }
+
+ }
+ namespace cv
+ {
+ namespace ocl
+ {
+ namespace stereoCSBP
+ {
+ //////////////////////////////////////////////////////////////////////////
+ //////////////////////////////common////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////
+ static inline int divUp(int total, int grain)
+ {
+ return (total + grain - 1) / grain;
+ }
- stringstream idxStr;
- if(data_type == CV_16S)
- idxStr << "0";
- else
- idxStr << "1";
- kernel_name += idxStr.str();
-
- return kernel_name;
++ static String get_kernel_name(String kernel_name, int data_type)
+ {
- string kernelName = get_kernel_name("init_data_cost_", data_type);
++ return kernel_name + (data_type == CV_16S ? "0" : "1");
+ }
+ using cv::ocl::StereoConstantSpaceBP;
+ //////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////init_data_cost//////////////////////////////////
+ //////////////////////////////////////////////////////////////////////////////////
+ static void init_data_cost_caller(const oclMat &left, const oclMat &right, oclMat &temp,
+ StereoConstantSpaceBP &rthis,
+ int msg_step, int h, int w, int level)
+ {
+ Context *clCxt = left.clCxt;
+ int data_type = rthis.msg_type;
+ int channels = left.oclchannels();
+
- string kernelName = get_kernel_name("init_data_cost_reduce_", data_type);
++ String kernelName = get_kernel_name("init_data_cost_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ //size_t blockSize = 256;
+ size_t localThreads[] = {32, 8 ,1};
+ size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+ divUp(h, localThreads[1]) *localThreads[1],
+ 1
+ };
+
+ int cdisp_step1 = msg_step * h;
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&temp.data));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&left.data));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&right.data));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&level));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&channels));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&msg_step));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&rthis.data_weight));
+ openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_float), (void *)&rthis.max_data_term));
+ openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&cdisp_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th));
+ openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step));
+ openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+
+ static void init_data_cost_reduce_caller(const oclMat &left, const oclMat &right, oclMat &temp,
+ StereoConstantSpaceBP &rthis,
+ int msg_step, int h, int w, int level)
+ {
+
+ Context *clCxt = left.clCxt;
+ int data_type = rthis.msg_type;
+ int channels = left.oclchannels();
+ int win_size = (int)std::pow(2.f, level);
+
- string kernelName = get_kernel_name("get_first_k_initial_local_", data_type);
++ String kernelName = get_kernel_name("init_data_cost_reduce_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ const int threadsNum = 256;
+ //size_t blockSize = threadsNum;
+ size_t localThreads[3] = {win_size, 1, threadsNum / win_size};
+ size_t globalThreads[3] = {w *localThreads[0],
+ h * divUp(rthis.ndisp, localThreads[2]) *localThreads[1], 1 * localThreads[2]
+ };
+
+ int local_mem_size = threadsNum * sizeof(float);
+ int cdisp_step1 = msg_step * h;
+
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&temp.data));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&left.data));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&right.data));
+ openCLSafeCall(clSetKernelArg(kernel, 3, local_mem_size, (void *)NULL));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&level));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&left.rows));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&win_size));
+ openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&channels));
+ openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&rthis.ndisp));
+ openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&left.step));
+ openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_float), (void *)&rthis.data_weight));
+ openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.max_data_term));
+ openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&rthis.min_disp_th));
+ openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&cdisp_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&msg_step));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+
+ static void get_first_initial_local_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
+ oclMat &temp, StereoConstantSpaceBP &rthis,
+ int h, int w, int nr_plane, int msg_step)
+ {
+ Context *clCxt = temp.clCxt;
+ int data_type = rthis.msg_type;
+
- string kernelName = get_kernel_name("get_first_k_initial_global_", data_type);
++ String kernelName = get_kernel_name("get_first_k_initial_local_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ //size_t blockSize = 256;
+ size_t localThreads[] = {32, 8 ,1};
+ size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+ divUp(h, localThreads[1]) *localThreads[1],
+ 1
+ };
+
+ int disp_step = msg_step * h;
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+ static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
+ oclMat &temp, StereoConstantSpaceBP &rthis,
+ int h, int w, int nr_plane, int msg_step)
+ {
+ Context *clCxt = temp.clCxt;
+ int data_type = rthis.msg_type;
+
- string kernelName = get_kernel_name("compute_data_cost_", data_type);
++ String kernelName = get_kernel_name("get_first_k_initial_global_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ //size_t blockSize = 256;
+ size_t localThreads[] = {32, 8, 1};
+ size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+ divUp(h, localThreads[1]) *localThreads[1],
+ 1
+ };
+
+ int disp_step = msg_step * h;
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+
+ static void init_data_cost(const oclMat &left, const oclMat &right, oclMat &temp, StereoConstantSpaceBP &rthis,
+ uchar *disp_selected_pyr, uchar *data_cost_selected,
+ size_t msg_step, int h, int w, int level, int nr_plane)
+ {
+
+ if(level <= 1)
+ init_data_cost_caller(left, right, temp, rthis, msg_step, h, w, level);
+ else
+ init_data_cost_reduce_caller(left, right, temp, rthis, msg_step, h, w, level);
+
+ if(rthis.use_local_init_data_cost == true)
+ {
+ get_first_initial_local_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w, nr_plane, msg_step);
+ }
+ else
+ {
+ get_first_initial_global_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w,
+ nr_plane, msg_step);
+ }
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////////compute_data_cost//////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////////////////////
+ static void compute_data_cost_caller(uchar *disp_selected_pyr, uchar *data_cost,
+ StereoConstantSpaceBP &rthis, int msg_step1,
+ int msg_step2, const oclMat &left, const oclMat &right, int h,
+ int w, int h2, int level, int nr_plane)
+ {
+ Context *clCxt = left.clCxt;
+ int channels = left.oclchannels();
+ int data_type = rthis.msg_type;
+
- string kernelName = get_kernel_name("compute_data_cost_reduce_", data_type);
++ String kernelName = get_kernel_name("compute_data_cost_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ //size_t blockSize = 256;
+ size_t localThreads[] = {32, 8, 1};
+ size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+ divUp(h, localThreads[1]) *localThreads[1],
+ 1
+ };
+
+ int disp_step1 = msg_step1 * h;
+ int disp_step2 = msg_step2 * h2;
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disp_selected_pyr));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&data_cost));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&left.data));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&right.data));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&w));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&level));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&nr_plane));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&channels));
+ openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&msg_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&msg_step2));
+ openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&disp_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step2));
+ openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.data_weight));
+ openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term));
+ openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&left.step));
+ openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&rthis.min_disp_th));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+ static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost,
+ StereoConstantSpaceBP &rthis, int msg_step1,
+ int msg_step2, const oclMat &left, const oclMat &right, int h,
+ int w, int h2, int level, int nr_plane)
+ {
+ Context *clCxt = left.clCxt;
+ int data_type = rthis.msg_type;
+ int channels = left.oclchannels();
+ int win_size = (int)std::pow(2.f, level);
+
- string kernelName = get_kernel_name("init_message_", data_type);
++ String kernelName = get_kernel_name("compute_data_cost_reduce_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ const size_t threadsNum = 256;
+ //size_t blockSize = threadsNum;
+ size_t localThreads[3] = {win_size, 1, threadsNum / win_size};
+ size_t globalThreads[3] = {w *localThreads[0],
+ h * divUp(nr_plane, localThreads[2]) *localThreads[1], 1 * localThreads[2]
+ };
+
+ int disp_step1 = msg_step1 * h;
+ int disp_step2 = msg_step2 * h2;
+ size_t local_mem_size = threadsNum * sizeof(float);
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disp_selected_pyr));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&data_cost));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&left.data));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&right.data));
+ openCLSafeCall(clSetKernelArg(kernel, 4, local_mem_size, (void *)NULL));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&level));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.rows));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&left.cols));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&nr_plane));
+ openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&channels));
+ openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&win_size));
+ openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&msg_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step2));
+ openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&disp_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&disp_step2));
+ openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_float), (void *)&rthis.data_weight));
+ openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term));
+ openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&left.step));
+ openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&rthis.min_disp_th));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+ static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis,
+ int msg_step1, int msg_step2, const oclMat &left, const oclMat &right, int h, int w,
+ int h2, int level, int nr_plane)
+ {
+ if(level <= 1)
+ compute_data_cost_caller(disp_selected_pyr, data_cost, rthis, msg_step1, msg_step2,
+ left, right, h, w, h2, level, nr_plane);
+ else
+ compute_data_cost_reduce_caller(disp_selected_pyr, data_cost, rthis, msg_step1, msg_step2,
+ left, right, h, w, h2, level, nr_plane);
+ }
+ ////////////////////////////////////////////////////////////////////////////////////////////////
+ //////////////////////////////////////init message//////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////////////////////
+ static void init_message(uchar *u_new, uchar *d_new, uchar *l_new, uchar *r_new,
+ uchar *u_cur, uchar *d_cur, uchar *l_cur, uchar *r_cur,
+ uchar *disp_selected_pyr_new, uchar *disp_selected_pyr_cur,
+ uchar *data_cost_selected, uchar *data_cost, oclMat &temp, StereoConstantSpaceBP rthis,
+ size_t msg_step1, size_t msg_step2, int h, int w, int nr_plane,
+ int h2, int w2, int nr_plane2)
+ {
+ Context *clCxt = temp.clCxt;
+ int data_type = rthis.msg_type;
+
- string kernelName = get_kernel_name("compute_message_", data_type);
++ String kernelName = get_kernel_name("init_message_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ //size_t blockSize = 256;
+ size_t localThreads[] = {32, 8, 1};
+ size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+ divUp(h, localThreads[1]) *localThreads[1],
+ 1
+ };
+
+ int disp_step1 = msg_step1 * h;
+ int disp_step2 = msg_step2 * h2;
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&u_new));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_new));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&l_new));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&r_new));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&u_cur));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&d_cur));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&l_cur));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *)&r_cur));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_mem), (void *)&temp.data));
+ openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_mem), (void *)&disp_selected_pyr_new));
+ openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *)&disp_selected_pyr_cur));
+ openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *)&data_cost_selected));
+ openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *)&data_cost));
+ openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&w));
+ openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&nr_plane));
+ openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&h2));
+ openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_int), (void *)&w2));
+ openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&nr_plane2));
+ openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&disp_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2));
+ openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1));
+ openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+ ////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////calc_all_iterations////////////////////////////////////////////////
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ static void calc_all_iterations_caller(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
+ uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
+ int msg_step, int h, int w, int nr_plane, int i)
+ {
+ Context *clCxt = temp.clCxt;
+ int data_type = rthis.msg_type;
+
- string kernelName = get_kernel_name("compute_disp_", data_type);
++ String kernelName = get_kernel_name("compute_message_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ size_t localThreads[] = {32, 8, 1};
+ size_t globalThreads[] = {divUp(w, (localThreads[0]) << 1) *localThreads[0],
+ divUp(h, localThreads[1]) *localThreads[1],
+ 1
+ };
+
+ int disp_step = msg_step * h;
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&u));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&l));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&r));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&data_cost_selected));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&disp_selected_pyr));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&temp.data));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&h));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&w));
+ openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&nr_plane));
+ openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&i));
+ openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_float), (void *)&rthis.max_disc_term));
+ openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
+ openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step));
+ openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+ static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
+ uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
+ int msg_step, int h, int w, int nr_plane)
+ {
+ for(int t = 0; t < rthis.iters; t++)
+ calc_all_iterations_caller(u, d, l, r, data_cost_selected, disp_selected_pyr, temp, rthis,
+ msg_step, h, w, nr_plane, t & 1);
+ }
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////
+ //////////////////////////compute_disp////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////////////////////
+ static void compute_disp(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
+ uchar *disp_selected_pyr, StereoConstantSpaceBP &rthis, size_t msg_step,
+ oclMat &disp, int nr_plane)
+ {
+ Context *clCxt = disp.clCxt;
+ int data_type = rthis.msg_type;
+
- int rows = left.rows;
- int cols = left.cols;
++ String kernelName = get_kernel_name("compute_disp_", data_type);
+
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+
+ //size_t blockSize = 256;
+ size_t localThreads[] = {32, 8, 1};
+ size_t globalThreads[] = {divUp(disp.cols, localThreads[0]) *localThreads[0],
+ divUp(disp.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
+
+ int step_size = disp.step / disp.elemSize();
+ int disp_step = disp.rows * msg_step;
+ openCLVerifyKernel(clCxt, kernel, localThreads);
+ openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&u));
+ openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d));
+ openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&l));
+ openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&r));
+ openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&data_cost_selected));
+ openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&disp_selected_pyr));
+ openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&disp.data));
+ openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&step_size));
+ openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&disp.cols));
+ openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&disp.rows));
+ openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane));
+ openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step));
+ openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
+ openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+ globalThreads, localThreads, 0, NULL, NULL));
+
+ clFinish(*(cl_command_queue*)getoclCommandQueue());
+ openCLSafeCall(clReleaseKernel(kernel));
+ }
+ }
+ }
+ }
+ namespace
+ {
+ const float DEFAULT_MAX_DATA_TERM = 30.0f;
+ const float DEFAULT_DATA_WEIGHT = 1.0f;
+ const float DEFAULT_MAX_DISC_TERM = 160.0f;
+ const float DEFAULT_DISC_SINGLE_JUMP = 10.0f;
+ }
+
+ void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane)
+ {
+ ndisp = (int) ((float) width / 3.14f);
+ if ((ndisp & 1) != 0)
+ ndisp++;
+
+ int mm = ::max(width, height);
+ iters = mm / 100 + ((mm > 1200) ? - 4 : 4);
+
+ levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
+ if (levels == 0) levels++;
+
+ nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
+ }
+
+ cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
+ int msg_type_)
+
+ : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
+ max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
+ max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0),
+ msg_type(msg_type_), use_local_init_data_cost(true)
+ {
+ CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
+ }
+
+
+ cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
+ float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_,
+ int min_disp_th_, int msg_type_)
+ : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
+ max_data_term(max_data_term_), data_weight(data_weight_),
+ max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_),
+ msg_type(msg_type_), use_local_init_data_cost(true)
+ {
+ CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
+ }
+
+ template<class T>
+ static void csbp_operator(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
+ oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
+ oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp)
+ {
+ CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
+ && left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
+
+ CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3));
+
+ const Scalar zero = Scalar::all(0);
+
+ ////////////////////////////////////Init///////////////////////////////////////////////////
- int levels = rthis.levels;
++ int rows = left.rows;
++ int cols = left.cols;
+
+ rthis.levels = min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0)));
- nr_plane_pyr[0] = rthis.nr_plane;
++ int levels = rthis.levels;
+
+ AutoBuffer<int> buf(levels * 4);
+
+ int *cols_pyr = buf;
+ int *rows_pyr = cols_pyr + levels;
+ int *nr_plane_pyr = rows_pyr + levels;
+ int *step_pyr = nr_plane_pyr + levels;
+
+ cols_pyr[0] = cols;
+ rows_pyr[0] = rows;
- step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T);
++ nr_plane_pyr[0] = rthis.nr_plane;
+
+ const int n = 64;
- disp_selected_pyr[0] = zero;
++ step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T);
+ for (int i = 1; i < levels; i++)
+ {
+ cols_pyr[i] = cols_pyr[i - 1] / 2;
+ rows_pyr[i] = rows_pyr[i - 1]/ 2;
+
+ nr_plane_pyr[i] = nr_plane_pyr[i - 1] * 2;
+
+ step_pyr[i] = alignSize(cols_pyr[i] * sizeof(T), n) / sizeof(T);
+ }
+
+ Size msg_size(step_pyr[0], rows * nr_plane_pyr[0]);
+ Size data_cost_size(step_pyr[0], rows * nr_plane_pyr[0] * 2);
+
+ u[0].create(msg_size, DataType<T>::type);
+ d[0].create(msg_size, DataType<T>::type);
+ l[0].create(msg_size, DataType<T>::type);
+ r[0].create(msg_size, DataType<T>::type);
+
+ u[1].create(msg_size, DataType<T>::type);
+ d[1].create(msg_size, DataType<T>::type);
+ l[1].create(msg_size, DataType<T>::type);
+ r[1].create(msg_size, DataType<T>::type);
+
+ disp_selected_pyr[0].create(msg_size, DataType<T>::type);
+ disp_selected_pyr[1].create(msg_size, DataType<T>::type);
+
+ data_cost.create(data_cost_size, DataType<T>::type);
+ data_cost_selected.create(msg_size, DataType<T>::type);
+
+ Size temp_size = data_cost_size;
+ if (data_cost_size.width * data_cost_size.height < step_pyr[0] * rows_pyr[levels - 1] * rthis.ndisp)
+ temp_size = Size(step_pyr[0], rows_pyr[levels - 1] * rthis.ndisp);
+
+ temp.create(temp_size, DataType<T>::type);
+ temp = zero;
+
+ ///////////////////////////////// Compute////////////////////////////////////////////////
+
+ //csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
+ // rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
+
+ l[0] = zero;
+ d[0] = zero;
+ r[0] = zero;
+ u[0] = zero;
++ disp_selected_pyr[0] = zero;
+
+ l[1] = zero;
+ d[1] = zero;
+ r[1] = zero;
+ u[1] = zero;
+ disp_selected_pyr[1] = zero;
+
+ data_cost = zero;
+
+ data_cost_selected = zero;
+
+ int cur_idx = 0;
+
+ for (int i = levels - 1; i >= 0; i--)
+ {
+ if (i == levels - 1)
+ {
+ cv::ocl::stereoCSBP::init_data_cost(left, right, temp, rthis, disp_selected_pyr[cur_idx].data,
+ data_cost_selected.data, step_pyr[0], rows_pyr[i], cols_pyr[i],
+ i, nr_plane_pyr[i]);
+ }
+ else
+ {
+ cv::ocl::stereoCSBP::compute_data_cost(
+ disp_selected_pyr[cur_idx].data, data_cost.data, rthis, step_pyr[0],
+ step_pyr[0], left, right, rows_pyr[i], cols_pyr[i], rows_pyr[i + 1], i,
+ nr_plane_pyr[i + 1]);
+
+ int new_idx = (cur_idx + 1) & 1;
+
+ cv::ocl::stereoCSBP::init_message(u[new_idx].data, d[new_idx].data, l[new_idx].data, r[new_idx].data,
+ u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
+ disp_selected_pyr[new_idx].data, disp_selected_pyr[cur_idx].data,
+ data_cost_selected.data, data_cost.data, temp, rthis, step_pyr[0],
+ step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rows_pyr[i + 1],
+ cols_pyr[i + 1], nr_plane_pyr[i + 1]);
+ cur_idx = new_idx;
+ }
+ cv::ocl::stereoCSBP::calc_all_iterations(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
+ data_cost_selected.data, disp_selected_pyr[cur_idx].data, temp,
+ rthis, step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i]);
+ }
+
+ if (disp.empty())
+ disp.create(rows, cols, CV_16S);
+
+ out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
+ out = zero;
+
+ stereoCSBP::compute_disp(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
+ data_cost_selected.data, disp_selected_pyr[cur_idx].data, rthis, step_pyr[0],
+ out, nr_plane_pyr[0]);
+ if (disp.type() != CV_16S)
+ out.convertTo(disp, disp.type());
+ }
+
+
+ typedef void (*csbp_operator_t)(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
+ oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
+ oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp);
+
+ const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
+
+ void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &left, const oclMat &right, oclMat &disp)
+ {
+
+ CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
+ operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out,
+ left, right, disp);
+ }
+
+ #endif /* !defined (HAVE_OPENCL) */
////////////////////////////////////////////////////////////////////////
static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap)
{
- Context *clCxt = input.clCxt;
-
- string kernelName = "prefilter_xsobel";
+ String kernelName = "prefilter_xsobel";
- cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
size_t blockSize = 1;
size_t globalThreads[3] = { input.cols, input.rows, 1 };
{
int winsz2 = winSize >> 1;
- Context *clCxt = left.clCxt;
-
- string kernelName = "stereoKernel";
+ String kernelName = "stereoKernel";
- cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
disp.setTo(Scalar_<unsigned char>::all(0));
minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF));
static void postfilter_textureness(oclMat &left, int winSize,
float avergeTexThreshold, oclMat &disparity)
{
- Context *clCxt = left.clCxt;
-
- string kernelName = "textureness_kernel";
+ String kernelName = "textureness_kernel";
- cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
size_t blockSize = 1;
size_t localThreads[] = { BLOCK_W, blockSize ,1};
--- /dev/null
- void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y,
- oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy,
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ // By downloading, copying, installing or using the software you agree to this license.
+ // If you do not agree to this license, do not download, install,
+ // copy or use the software.
+ //
+ //
+ // License Agreement
+ // For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // @Authors
+ // Jin Ma, jin@multicorewareinc.com
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ // * Redistribution's of source code must retain the above copyright notice,
+ // this list of conditions and the following disclaimer.
+ //
+ // * Redistribution's in binary form must reproduce the above copyright notice,
+ // this list of conditions and the following disclaimer in the documentation
+ // and/or other oclMaterials provided with the distribution.
+ //
+ // * The name of the copyright holders may not be used to endorse or promote products
+ // derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+
+
+ #include "precomp.hpp"
+ using namespace std;
+ using namespace cv;
+ using namespace cv::ocl;
+
+ namespace cv
+ {
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char* tvl1flow;
+ }
+ }
+
+ cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL()
+ {
+ tau = 0.25;
+ lambda = 0.15;
+ theta = 0.3;
+ nscales = 5;
+ warps = 5;
+ epsilon = 0.01;
+ iterations = 300;
+ useInitialFlow = false;
+ }
+
+ void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy)
+ {
+ CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
+ CV_Assert( I0.size() == I1.size() );
+ CV_Assert( I0.type() == I1.type() );
+ CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
+ CV_Assert( nscales > 0 );
+
+ // allocate memory for the pyramid structure
+ I0s.resize(nscales);
+ I1s.resize(nscales);
+ u1s.resize(nscales);
+ u2s.resize(nscales);
+ //I0s_step == I1s_step
+ I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
+ I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
+
+
+ if (!useInitialFlow)
+ {
+ flowx.create(I0.size(), CV_32FC1);
+ flowy.create(I0.size(), CV_32FC1);
+ }
+ //u1s_step != u2s_step
+ u1s[0] = flowx;
+ u2s[0] = flowy;
+
+ I1x_buf.create(I0.size(), CV_32FC1);
+ I1y_buf.create(I0.size(), CV_32FC1);
+
+ I1w_buf.create(I0.size(), CV_32FC1);
+ I1wx_buf.create(I0.size(), CV_32FC1);
+ I1wy_buf.create(I0.size(), CV_32FC1);
+
+ grad_buf.create(I0.size(), CV_32FC1);
+ rho_c_buf.create(I0.size(), CV_32FC1);
+
+ p11_buf.create(I0.size(), CV_32FC1);
+ p12_buf.create(I0.size(), CV_32FC1);
+ p21_buf.create(I0.size(), CV_32FC1);
+ p22_buf.create(I0.size(), CV_32FC1);
+
+ diff_buf.create(I0.size(), CV_32FC1);
+
+ // create the scales
+ for (int s = 1; s < nscales; ++s)
+ {
+ ocl::pyrDown(I0s[s - 1], I0s[s]);
+ ocl::pyrDown(I1s[s - 1], I1s[s]);
+
+ if (I0s[s].cols < 16 || I0s[s].rows < 16)
+ {
+ nscales = s;
+ break;
+ }
+
+ if (useInitialFlow)
+ {
+ ocl::pyrDown(u1s[s - 1], u1s[s]);
+ ocl::pyrDown(u2s[s - 1], u2s[s]);
+
+ //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]);
+ multiply(0.5, u1s[s], u1s[s]);
+ //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]);
+ multiply(0.5, u1s[s], u2s[s]);
+ }
+ }
+
+ // pyramidal structure for computing the optical flow
+ for (int s = nscales - 1; s >= 0; --s)
+ {
+ // compute the optical flow at the current scale
+ procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]);
+
+ // if this was the last scale, finish now
+ if (s == 0)
+ break;
+
+ // otherwise, upsample the optical flow
+
+ // zoom the optical flow for the next finer scale
+ ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
+ ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
+
+ // scale the optical flow with the appropriate zoom factor
+ multiply(2, u1s[s - 1], u1s[s - 1]);
+ multiply(2, u2s[s - 1], u2s[s - 1]);
+
+ }
+
+ }
+
+ namespace ocl_tvl1flow
+ {
+ void centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy);
+
- void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
- oclMat &rho_c, oclMat &p11, oclMat &p12,
- oclMat &p21, oclMat &p22, oclMat &u1,
++ void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y,
++ oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy,
+ oclMat &grad, oclMat &rho);
+
- void estimateDualVariables(oclMat &u1, oclMat &u2,
++ void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
++ oclMat &rho_c, oclMat &p11, oclMat &p12,
++ oclMat &p21, oclMat &p22, oclMat &u1,
+ oclMat &u2, oclMat &error, float l_t, float theta);
+
- estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
++ void estimateDualVariables(oclMat &u1, oclMat &u2,
+ oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
+ }
+
+ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2)
+ {
+ using namespace ocl_tvl1flow;
+
+ const double scaledEpsilon = epsilon * epsilon * I0.size().area();
+
+ CV_DbgAssert( I1.size() == I0.size() );
+ CV_DbgAssert( I1.type() == I0.type() );
+ CV_DbgAssert( u1.empty() || u1.size() == I0.size() );
+ CV_DbgAssert( u2.size() == u1.size() );
+
+ if (u1.empty())
+ {
+ u1.create(I0.size(), CV_32FC1);
+ u1.setTo(Scalar::all(0));
+
+ u2.create(I0.size(), CV_32FC1);
+ u2.setTo(Scalar::all(0));
+ }
+
+ oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ centeredGradient(I1, I1x, I1y);
+
+ oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
+ p11.setTo(Scalar::all(0));
+ p12.setTo(Scalar::all(0));
+ p21.setTo(Scalar::all(0));
+ p22.setTo(Scalar::all(0));
+
+ oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ const float l_t = static_cast<float>(lambda * theta);
+ const float taut = static_cast<float>(tau / theta);
+
+ for (int warpings = 0; warpings < warps; ++warpings)
+ {
+ warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
+
+ double error = numeric_limits<double>::max();
+ for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
+ {
- string kernelName = "centeredGradientKernel";
++ estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
+ u1, u2, diff, l_t, static_cast<float>(theta));
+
+ error = ocl::sum(diff)[0];
+
+ estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
+
+ }
+ }
+
+ }
+
+ void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
+ {
+ I0s.clear();
+ I1s.clear();
+ u1s.clear();
+ u2s.clear();
+
+ I1x_buf.release();
+ I1y_buf.release();
+
+ I1w_buf.release();
+ I1wx_buf.release();
+ I1wy_buf.release();
+
+ grad_buf.release();
+ rho_c_buf.release();
+
+ p11_buf.release();
+ p12_buf.release();
+ p21_buf.release();
+ p22_buf.release();
+
+ diff_buf.release();
+ norm_buf.release();
+ }
+
+ void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy)
+ {
+ Context *clCxt = src.clCxt;
+ size_t localThreads[3] = {32, 8, 1};
+ size_t globalThreads[3] = {src.cols, src.rows, 1};
+
+ int srcElementSize = src.elemSize();
+ int src_step = src.step/srcElementSize;
+
+ int dElememntSize = dx.elemSize();
+ int dx_step = dx.step/dElememntSize;
+
- size_t globalThread[] =
++ String kernelName = "centeredGradientKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&src_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step));
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1);
+
+ }
+
+ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut)
+ {
+ Context *clCxt = u1.clCxt;
+
+ size_t localThread[] = {32, 8, 1};
- u1.cols,
++ size_t globalThread[] =
+ {
- string kernelName = "estimateDualVariablesKernel";
++ u1.cols,
+ u1.rows,
+ 1
+ };
+
+ int u1_element_size = u1.elemSize();
+ int u1_step = u1.step/u1_element_size;
+
+ int u2_element_size = u2.elemSize();
+ int u2_step = u2.step/u2_element_size;
+
+ int p11_element_size = p11.elemSize();
+ int p11_step = p11.step/p11_element_size;
+
+ int u1_offset_y = u1.offset/u1.step;
+ int u1_offset_x = u1.offset%u1.step;
+ u1_offset_x = u1_offset_x/u1.elemSize();
+
+ int u2_offset_y = u2.offset/u2.step;
+ int u2_offset_x = u2.offset%u2.step;
+ u2_offset_x = u2_offset_x/u2.elemSize();
+
-void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
- oclMat &rho_c, oclMat &p11, oclMat &p12,
- oclMat &p21, oclMat &p22, oclMat &u1,
++ String kernelName = "estimateDualVariablesKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+ args.push_back( make_pair( sizeof(cl_float), (void*)&taut));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+ }
+
- size_t globalThread[] =
++void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
++ oclMat &rho_c, oclMat &p11, oclMat &p12,
++ oclMat &p21, oclMat &p22, oclMat &u1,
+ oclMat &u2, oclMat &error, float l_t, float theta)
+ {
+ Context* clCxt = I1wx.clCxt;
+
+ size_t localThread[] = {32, 8, 1};
- I1wx.cols,
++ size_t globalThread[] =
+ {
- string kernelName = "estimateUKernel";
++ I1wx.cols,
+ I1wx.rows,
+ 1
+ };
+
+ int I1wx_element_size = I1wx.elemSize();
+ int I1wx_step = I1wx.step/I1wx_element_size;
+
+ int u1_element_size = u1.elemSize();
+ int u1_step = u1.step/u1_element_size;
+
+ int u2_element_size = u2.elemSize();
+ int u2_step = u2.step/u2_element_size;
+
+ int u1_offset_y = u1.offset/u1.step;
+ int u1_offset_x = u1.offset%u1.step;
+ u1_offset_x = u1_offset_x/u1.elemSize();
+
+ int u2_offset_y = u2.offset/u2.step;
+ int u2_offset_x = u2.offset%u2.step;
+ u2_offset_x = u2_offset_x/u2.elemSize();
+
-
++ String kernelName = "estimateUKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data));
+ args.push_back( make_pair( sizeof(cl_float), (void*)&l_t));
+ args.push_back( make_pair( sizeof(cl_float), (void*)&theta));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+ }
+
+ void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho)
+ {
+ Context* clCxt = I0.clCxt;
+ const bool isImgSupported = support_image2d(clCxt);
- size_t globalThread[] =
++
+ CV_Assert(isImgSupported);
+
+ int u1ElementSize = u1.elemSize();
+ int u1Step = u1.step/u1ElementSize;
+
+ int u2ElementSize = u2.elemSize();
+ int u2Step = u2.step/u2ElementSize;
+
+ int I0ElementSize = I0.elemSize();
+ int I0Step = I0.step/I0ElementSize;
+
+ int I1w_element_size = I1w.elemSize();
+ int I1w_step = I1w.step/I1w_element_size;
+
+ int u1_offset_y = u1.offset/u1.step;
+ int u1_offset_x = u1.offset%u1.step;
+ u1_offset_x = u1_offset_x/u1.elemSize();
+
+ int u2_offset_y = u2.offset/u2.step;
+ int u2_offset_x = u2.offset%u2.step;
+ u2_offset_x = u2_offset_x/u2.elemSize();
+
+ size_t localThread[] = {32, 8, 1};
- I0.cols,
++ size_t globalThread[] =
+ {
- string kernelName = "warpBackwardKernel";
++ I0.cols,
+ I0.rows,
+ 1
+ };
+
+ cl_mem I1_tex;
+ cl_mem I1x_tex;
+ cl_mem I1y_tex;
+ I1_tex = bindTexture(I1);
+ I1x_tex = bindTexture(I1x);
+ I1y_tex = bindTexture(I1y);
+
-}
++ String kernelName = "warpBackwardKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
++}
#include <iterator>
#include <string>
#include <cstdarg>
-#include "cvconfig.h"
-#include "opencv2/core/core.hpp"
-#include "opencv2/highgui/highgui.hpp"
-//#include "opencv2/calib3d/calib3d.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/video/video.hpp"
-#include "opencv2/ts/ts.hpp"
-#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/ts.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/video.hpp"
+#include "opencv2/ocl.hpp"
#include "utility.hpp"
- #include "interpolation.hpp"
-//#include "add_test_info.h"
-#endif
+#include "opencv2/core/private.hpp"
-
+#endif
return ;
}
- //int i = 0;
- //double t = 0;
vector<Rect> faces, oclfaces;
- // const static Scalar colors[] = { CV_RGB(0, 0, 255),
- // CV_RGB(0, 128, 255),
- // CV_RGB(0, 255, 255),
- // CV_RGB(0, 255, 0),
- // CV_RGB(255, 128, 0),
- // CV_RGB(255, 255, 0),
- // CV_RGB(255, 0, 0),
- // CV_RGB(255, 0, 255)
- // } ;
-
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));
- cvtColor( img, gray, CV_BGR2GRAY );
+ cvtColor( img, gray, COLOR_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
-#ifdef HAVE_CVCONFIG_H
-#include "cvconfig.h"
-#endif
-
-#include "opencv2/ts/ts.hpp"
-#include "opencv2/ts/gpu_perf.hpp"
-
-#include "opencv2/core/core.hpp"
+#include "opencv2/core.hpp"
#include "opencv2/core/gpumat.hpp"
- #include "opencv2/ts/ts_perf.hpp"
-#include "opencv2/superres/superres.hpp"
++#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+#include "opencv2/superres.hpp"
#include "opencv2/superres/optical_flow.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
CV_EXPORTS void fillGradient(Mat& img, int delta = 5);
CV_EXPORTS void smoothBorder(Mat& img, const Scalar& color, int delta = 3);
+ CV_EXPORTS void printVersionInfo(bool useStdOut = true);
} //namespace cvtest
-// fills c with zeros
-CV_EXPORTS void cvTsZero( CvMat* c, const CvMat* mask=0 );
-
-// copies a to b (whole matrix or only the selected region)
-CV_EXPORTS void cvTsCopy( const CvMat* a, CvMat* b, const CvMat* mask=0 );
-
-// converts one array to another
-CV_EXPORTS void cvTsConvert( const CvMat* src, CvMat* dst );
-
-CV_EXPORTS void cvTsGEMM( const CvMat* a, const CvMat* b, double alpha,
- const CvMat* c, double beta, CvMat* d, int flags );
-
#define CV_TEST_MAIN(resourcesubdir) \
int main(int argc, char **argv) \
{ \
<< "'" << expr2 << "': " << MatPart(m2part, border > 0 ? &loc : 0) << ".\n";
}
-} //namespace cvtest
-
-void cvTsConvert( const CvMat* src, CvMat* dst )
-{
- Mat _src = cvarrToMat(src), _dst = cvarrToMat(dst);
- cvtest::convert(_src, _dst, _dst.depth());
-}
-
-void cvTsZero( CvMat* dst, const CvMat* mask )
-{
- Mat _dst = cvarrToMat(dst), _mask = mask ? cvarrToMat(mask) : Mat();
- cvtest::set(_dst, Scalar::all(0), _mask);
+ void printVersionInfo(bool useStdOut)
+ {
+ ::testing::Test::RecordProperty("CV_VERSION", CV_VERSION);
+ if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl;
+
+ std::string buildInfo( cv::getBuildInformation() );
+
+ size_t pos1 = buildInfo.find("Version control");
+ size_t pos2 = buildInfo.find("\n", pos1);\
+ if(pos1 != std::string::npos && pos2 != std::string::npos)
+ {
+ std::string ver( buildInfo.substr(pos1, pos2-pos1) );
+ ::testing::Test::RecordProperty("Version_control", ver);
+ if(useStdOut) std::cout << ver << std::endl;
+ }
+
+ pos1 = buildInfo.find("inner version");
+ pos2 = buildInfo.find("\n", pos1);\
+ if(pos1 != std::string::npos && pos2 != std::string::npos)
+ {
+ std::string ver( buildInfo.substr(pos1, pos2-pos1) );
+ ::testing::Test::RecordProperty("inner_version", ver);
+ if(useStdOut) std::cout << ver << std::endl;
+ }
+ }
+
}