Merge remote-tracking branch 'origin/2.4'

author Roman Donchenko <roman.donchenko@itseez.com>

Tue, 21 May 2013 10:35:24 +0000 (14:35 +0400)

committer Roman Donchenko <roman.donchenko@itseez.com>

Tue, 21 May 2013 13:19:23 +0000 (17:19 +0400)
author Roman Donchenko <roman.donchenko@itseez.com>
Tue, 21 May 2013 10:35:24 +0000 (14:35 +0400)
committer Roman Donchenko <roman.donchenko@itseez.com>
Tue, 21 May 2013 13:19:23 +0000 (17:19 +0400)
diff --cc 3rdparty/libjasper/CMakeLists.txt

index 83c0198,97919be..7a70a19
--- 1/3rdparty/libjasper/CMakeLists.txt
--- 2/3rdparty/libjasper/CMakeLists.txt
+++ b/3rdparty/libjasper/CMakeLists.txt
@@@ -23,9 -23,8 +23,9 @@@ if(WIN32 AND NOT MINGW
     add_definitions(-DJAS_WIN_MSVC_BUILD)
   endif(WIN32 AND NOT MINGW)
   
- ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized
-                                    -Wmissing-prototypes -Wmissing-declarations -Wunused -Wshadow
+ ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes
- -                                   -Wno-unused-but-set-parameter -Wmissing-declarations -Wunused -Wshadow -Wsign-compare)
++                                   -Wno-unused-but-set-parameter -Wmissing-declarations -Wunused -Wshadow
+ +                                   -Wsign-compare -Wstrict-overflow)
   ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang
   ocv_warnings_disable(CMAKE_C_FLAGS /wd4013 /wd4018 /wd4101 /wd4244 /wd4267 /wd4715) # vs2005
   
@@@ -49,4 -48,4 +49,3 @@@ endif(
   if(NOT BUILD_SHARED_LIBS)
     install(TARGETS ${JASPER_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main)
   endif()
--
diff --cc CMakeLists.txt
Simple merge
diff --cc cmake/OpenCVDetectAndroidSDK.cmake
Simple merge
diff --cc cmake/OpenCVDetectPython.cmake
Simple merge
diff --cc modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst

index 4dda966,b90fba7..f2fbfd1
--- 1/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
--- 2/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
+++ b/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst
@@@ -1478,14 -1475,8 +1478,14 @@@ The function reconstructs 3-dimensiona
   
   .. [Hartley99] Hartley, R.I., Theory and Practice of Projective Rectification. IJCV 35 2, pp 115-127 (1999)
   
+ +.. [HartleyZ00] Hartley, R. and Zisserman, A. Multiple View Geomtry in Computer Vision, Cambridge University Press, 2000.
+ +
   .. [HH08] Hirschmuller, H. Stereo Processing by Semiglobal Matching and Mutual Information, PAMI(30), No. 2, February 2008, pp. 328-341.
   
- .. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://gregslabaugh.name/publications/euler.pdf
+ +.. [Nister03] Nistér, D. An efficient solution to the five-point relative pose problem, CVPR 2003.
+ +
+ +.. [SteweniusCFS] Stewénius, H., Calibrated Fivepoint solver. http://www.vis.uky.edu/~stewe/FIVEPOINT/
+ +
+ .. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://www.soi.city.ac.uk/~sbbh653/publications/euler.pdf (verified: 2013-04-15)
   
   .. [Zhang2000] Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.
diff --cc modules/core/src/matmul.cpp
Simple merge
diff --cc modules/highgui/CMakeLists.txt

index 5955ab6,4c60867..375d754
--- 1/modules/highgui/CMakeLists.txt
--- 2/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@@@ -74,9 -68,28 +74,28 @@@ set(highgui_src
       src/window.cpp
       )
   
- -file(GLOB highgui_ext_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
+ +file(GLOB highgui_ext_hdrs "include/opencv2/*.hpp" "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
   
- if(HAVE_QT)
+ if(HAVE_QT5)
+   set(CMAKE_AUTOMOC ON)
+   set(CMAKE_INCLUDE_CURRENT_DIR ON)
+ 
+   QT5_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc)
+   list(APPEND highgui_srcs src/window_QT.cpp src/window_QT.h ${_RCC_OUTFILES})
+ 
+   foreach(dt5_dep Core Gui Widgets Test Concurrent)
+     add_definitions(${Qt5${dt5_dep}_DEFINITIONS})
+     include_directories(${Qt5${dt5_dep}_INCLUDE_DIRS})
+     list(APPEND HIGHGUI_LIBRARIES ${Qt5${dt5_dep}_LIBRARIES})
+   endforeach()
+ 
+   if(HAVE_QT_OPENGL)
+     add_definitions(${Qt5OpenGL_DEFINITIONS})
+     include_directories(${Qt5OpenGL_INCLUDE_DIRS})
+     list(APPEND HIGHGUI_LIBRARIES ${Qt5OpenGL_LIBRARIES})
+   endif()
+ 
+ elseif(HAVE_QT)
     if (HAVE_QT_OPENGL)
       set(QT_USE_QTOPENGL TRUE)
     endif()
diff --cc modules/highgui/include/opencv2/highgui/highgui_c.h
Simple merge
diff --cc modules/highgui/src/grfmt_jpeg.cpp
Simple merge
diff --cc modules/highgui/src/grfmt_png.cpp
Simple merge
diff --cc modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst
Simple merge
diff --cc modules/imgproc/src/floodfill.cpp

index 5436c7b,ada6dcc..40556c4
--- 1/modules/imgproc/src/floodfill.cpp
--- 2/modules/imgproc/src/floodfill.cpp
+++ b/modules/imgproc/src/floodfill.cpp
@@@ -123,13 -89,11 +123,12 @@@ ConnectedComp::ConnectedComp(
   
   template<typename _Tp>
   static void
- -icvFloodFill_CnIR( uchar* pImage, int step, CvSize roi, CvPoint seed,
- -                   _Tp newVal, CvConnectedComp* region, int flags,
- -                   std::vector<CvFFillSegment>* buffer )
+ +floodFill_CnIR( Mat& image, Point seed,
+ +               _Tp newVal, ConnectedComp* region, int flags,
+ +               std::vector<FFillSegment>* buffer )
   {
-     typedef typename DataType<_Tp>::channel_type _CTp;
- -    _Tp* img = (_Tp*)(pImage + step * seed.y);
+ +    _Tp* img = (_Tp*)(image.data + image.step * seed.y);
+ +    Size roi = image.size();
       int i, L, R;
       int area = 0;
       int XMin, XMax, YMin = seed.y, YMax = seed.y;
@@@ -268,30 -232,36 +267,29 @@@ struct DiffC
   };
   
   typedef DiffC1<int> Diff32sC1;
- -typedef DiffC3<cv::Vec3i> Diff32sC3;
+ +typedef DiffC3<Vec3i> Diff32sC3;
   typedef DiffC1<float> Diff32fC1;
- -typedef DiffC3<cv::Vec3f> Diff32fC3;
- -
- -static cv::Vec3i& operator += (cv::Vec3i& a, const cv::Vec3b& b)
- -{
- -    a[0] += b[0];
- -    a[1] += b[1];
- -    a[2] += b[2];
- -    return a;
- -}
+ +typedef DiffC3<Vec3f> Diff32fC3;
   
- -template<typename _Tp, typename _WTp, class Diff>
+ +template<typename _Tp, typename _MTp, typename _WTp, class Diff>
   static void
- -icvFloodFillGrad_CnIR( uchar* pImage, int step, uchar* pMask, int maskStep,
- -                       CvSize /*roi*/, CvPoint seed, _Tp newVal, Diff diff,
- -                       CvConnectedComp* region, int flags,
- -                       std::vector<CvFFillSegment>* buffer )
+ +floodFillGrad_CnIR( Mat& image, Mat& msk,
+ +                   Point seed, _Tp newVal, _MTp newMaskVal,
+ +                   Diff diff, ConnectedComp* region, int flags,
+ +                   std::vector<FFillSegment>* buffer )
   {
-     typedef typename DataType<_Tp>::channel_type _CTp;
+ +    int step = (int)image.step, maskStep = (int)msk.step;
+ +    uchar* pImage = image.data;
       _Tp* img = (_Tp*)(pImage + step*seed.y);
- -    uchar* mask = (pMask += maskStep + 1) + maskStep*seed.y;
+ +    uchar* pMask = msk.data + maskStep + sizeof(_MTp);
+ +    _MTp* mask = (_MTp*)(pMask + maskStep*seed.y);
       int i, L, R;
       int area = 0;
- -    _WTp sum = _WTp((typename cv::DataType<_Tp>::channel_type)0);
       int XMin, XMax, YMin = seed.y, YMax = seed.y;
       int _8_connectivity = (flags & 255) == 8;
- -    int fixedRange = flags & CV_FLOODFILL_FIXED_RANGE;
- -    int fillImage = (flags & CV_FLOODFILL_MASK_ONLY) == 0;
- -    uchar newMaskVal = (uchar)(flags & 0xff00 ? flags >> 8 : 1);
- -    CvFFillSegment* buffer_end = &buffer->front() + buffer->size(), *head = &buffer->front(), *tail = &buffer->front();
+ +    int fixedRange = flags & FLOODFILL_FIXED_RANGE;
+ +    int fillImage = (flags & FLOODFILL_MASK_ONLY) == 0;
+ +    FFillSegment* buffer_end = &buffer->front() + buffer->size(), *head = &buffer->front(), *tail = &buffer->front();
   
       L = R = seed.x;
       if( mask[L] )
@@@ -576,44 -579,44 +574,44 @@@ int cv::floodFill( InputOutputArray _im
       else
           CV_Error( CV_StsUnsupportedFormat, "" );
   
+ +    uchar newMaskVal = (uchar)((flags & ~0xff) == 0 ? 1 : ((flags >> 8) & 255));
+ +
       if( type == CV_8UC1 )
- -        icvFloodFillGrad_CnIR<uchar, int, Diff8uC1>(
- -                              img->data.ptr, img->step, mask->data.ptr, mask->step,
- -                              size, seed_point, nv_buf.b[0],
- -                              Diff8uC1(ld_buf.b[0], ud_buf.b[0]),
- -                              comp, flags, &buffer);
+ +        floodFillGrad_CnIR<uchar, uchar, int, Diff8uC1>(
+ +                img, mask, seedPoint, nv_buf.b[0], newMaskVal,
+ +                Diff8uC1(ld_buf.b[0], ud_buf.b[0]),
+ +                &comp, flags, &buffer);
       else if( type == CV_8UC3 )
- -        icvFloodFillGrad_CnIR<cv::Vec3b, cv::Vec3i, Diff8uC3>(
- -                              img->data.ptr, img->step, mask->data.ptr, mask->step,
- -                              size, seed_point, cv::Vec3b(nv_buf.b),
- -                              Diff8uC3(ld_buf.b, ud_buf.b),
- -                              comp, flags, &buffer);
+ +        floodFillGrad_CnIR<Vec3b, uchar, Vec3i, Diff8uC3>(
+ +                img, mask, seedPoint, Vec3b(nv_buf.b), newMaskVal,
+ +                Diff8uC3(ld_buf.b, ud_buf.b),
+ +                &comp, flags, &buffer);
       else if( type == CV_32SC1 )
- -        icvFloodFillGrad_CnIR<int, int, Diff32sC1>(
- -                              img->data.ptr, img->step, mask->data.ptr, mask->step,
- -                              size, seed_point, nv_buf.i[0],
- -                              Diff32sC1(ld_buf.i[0], ud_buf.i[0]),
- -                              comp, flags, &buffer);
+ +        floodFillGrad_CnIR<int, uchar, int, Diff32sC1>(
+ +                img, mask, seedPoint, nv_buf.i[0], newMaskVal,
+ +                Diff32sC1(ld_buf.i[0], ud_buf.i[0]),
+ +                &comp, flags, &buffer);
       else if( type == CV_32SC3 )
- -        icvFloodFillGrad_CnIR<cv::Vec3i, cv::Vec3i, Diff32sC3>(
- -                              img->data.ptr, img->step, mask->data.ptr, mask->step,
- -                              size, seed_point, cv::Vec3i(nv_buf.i),
- -                              Diff32sC3(ld_buf.i, ud_buf.i),
- -                              comp, flags, &buffer);
+ +        floodFillGrad_CnIR<Vec3i, uchar, Vec3i, Diff32sC3>(
+ +                img, mask, seedPoint, Vec3i(nv_buf.i), newMaskVal,
+ +                Diff32sC3(ld_buf.i, ud_buf.i),
+ +                &comp, flags, &buffer);
       else if( type == CV_32FC1 )
- -        icvFloodFillGrad_CnIR<float, float, Diff32fC1>(
- -                              img->data.ptr, img->step, mask->data.ptr, mask->step,
- -                              size, seed_point, nv_buf.f[0],
- -                              Diff32fC1(ld_buf.f[0], ud_buf.f[0]),
- -                              comp, flags, &buffer);
+ +        floodFillGrad_CnIR<float, uchar, float, Diff32fC1>(
+ +                img, mask, seedPoint, nv_buf.f[0], newMaskVal,
+ +                Diff32fC1(ld_buf.f[0], ud_buf.f[0]),
+ +                &comp, flags, &buffer);
       else if( type == CV_32FC3 )
- -        icvFloodFillGrad_CnIR<cv::Vec3f, cv::Vec3f, Diff32fC3>(
- -                              img->data.ptr, img->step, mask->data.ptr, mask->step,
- -                              size, seed_point, cv::Vec3f(nv_buf.f),
- -                              Diff32fC3(ld_buf.f, ud_buf.f),
- -                              comp, flags, &buffer);
+ +        floodFillGrad_CnIR<Vec3f, uchar, Vec3f, Diff32fC3>(
+ +                img, mask, seedPoint, Vec3f(nv_buf.f), newMaskVal,
+ +                Diff32fC3(ld_buf.f, ud_buf.f),
+ +                &comp, flags, &buffer);
       else
           CV_Error(CV_StsUnsupportedFormat, "");
-     
++
+ +    if( rect )
+ +        *rect = comp.rect;
+ +    return comp.area;
   }
   
   
diff --cc modules/imgproc/src/imgwarp.cpp
Simple merge
diff --cc modules/objdetect/src/cascadedetect.cpp
Simple merge
diff --cc modules/ocl/include/opencv2/ocl.hpp

index 745953d,0000000..0df96db

mode 100644,000000..100644
--- 1/modules/ocl/include/opencv2/ocl.hpp
--- /dev/null
+++ b/modules/ocl/include/opencv2/ocl.hpp
@@@ -1,1750 -1,0 +1,1690 @@@
-             enum {CL_DOUBLE, CL_UNIFIED_MEM};
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+ +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other oclMaterials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_OCL_HPP__
+ +#define __OPENCV_OCL_HPP__
+ +
+ +#include <memory>
+ +#include <vector>
+ +
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/objdetect.hpp"
+ +
+ +namespace cv
+ +{
+ +    namespace ocl
+ +    {
+ +        enum
+ +        {
+ +            CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
+ +            CVCL_DEVICE_TYPE_CPU         = (1 << 1),
+ +            CVCL_DEVICE_TYPE_GPU         = (1 << 2),
+ +            CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
+ +            //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
+ +            CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
+ +        };
+ +
+ +        enum DevMemRW
+ +        {
+ +            DEVICE_MEM_R_W = 0,
+ +            DEVICE_MEM_R_ONLY,
+ +            DEVICE_MEM_W_ONLY
+ +        };
+ +
+ +        enum DevMemType
+ +        {
+ +            DEVICE_MEM_DEFAULT = 0,
+ +            DEVICE_MEM_AHP,         //alloc host pointer
+ +            DEVICE_MEM_UHP,         //use host pointer
+ +            DEVICE_MEM_CHP,         //copy host pointer
+ +            DEVICE_MEM_PM           //persistent memory
+ +        };
+ +
+ +        //Get the global device memory and read/write type
+ +        //return 1 if unified memory system supported, otherwise return 0
+ +        CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
+ +
+ +        //Set the global device memory and read/write type,
+ +        //the newly generated oclMat will all use this type
+ +        //return -1 if the target type is unsupported, otherwise return 0
+ +        CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
+ +
+ +        //this class contains ocl runtime information
+ +        class CV_EXPORTS Info
+ +        {
+ +        public:
+ +            struct Impl;
+ +            Impl *impl;
+ +
+ +            Info();
+ +            Info(const Info &m);
+ +            ~Info();
+ +            void release();
+ +            Info &operator = (const Info &m);
+ +            std::vector<String> DeviceName;
+ +            String PlatformName;
+ +        };
+ +        //////////////////////////////// Initialization & Info ////////////////////////
+ +        //this function may be obsoleted
+ +        //CV_EXPORTS cl_device_id getDevice();
+ +        //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
+ +        //each Info relates to an OpenCL platform
+ +        //there is one or more devices in each platform, each one has a separate name
+ +        CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
+ +
+ +        //set device you want to use, optional function after getDevice be called
+ +        //the devnum is the index of the selected device in DeviceName vector of INfo
+ +        CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
+ +
+ +        //optional function, if you want save opencl binary kernel to the file, set its path
+ +        CV_EXPORTS  void setBinpath(const char *path);
+ +
+ +        //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+ +        CV_EXPORTS void* getoclContext();
+ +
+ +        CV_EXPORTS void* getoclCommandQueue();
+ +
+ +        //explicit call clFinish. The global command queue will be used.
+ +        CV_EXPORTS void finish();
+ +
+ +        //this function enable ocl module to use customized cl_context and cl_command_queue
+ +        //getDevice also need to be called before this function
+ +        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
+ +
+ +        //////////////////////////////// OpenCL context ////////////////////////
+ +        //This is a global singleton class used to represent a OpenCL context.
+ +        class CV_EXPORTS Context
+ +        {
+ +        protected:
+ +            Context();
+ +            friend class std::auto_ptr<Context>;
+ +
+ +        private:
+ +            static std::auto_ptr<Context> clCxt;
+ +            static int val;
+ +        public:
+ +            ~Context();
+ +            void release();
+ +            Info::Impl* impl;
+ +
+ +            static Context *getContext();
+ +            static void setContext(Info &oclinfo);
+ +
-             void createEx(int rows, int cols, int type, 
++            enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
+ +            bool supportsFeature(int ftype);
+ +            size_t computeUnits();
+ +            size_t maxWorkGroupSize();
+ +            void* oclContext();
+ +            void* oclCommandQueue();
+ +        };
+ +
+ +        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ +        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ +                                                        const char **source, String kernelName,
+ +                                                        size_t globalThreads[3], size_t localThreads[3],
+ +                                                        std::vector< std::pair<size_t, const void *> > &args,
+ +                                                        int channels, int depth, const char *build_options,
+ +                                                        bool finish = true, bool measureKernelTime = false,
+ +                                                        bool cleanUp = true);
+ +
+ +        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ +        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ +                                                        const char **fileName, const int numFiles, String kernelName,
+ +                                                        size_t globalThreads[3], size_t localThreads[3],
+ +                                                        std::vector< std::pair<size_t, const void *> > &args,
+ +                                                        int channels, int depth, const char *build_options,
+ +                                                        bool finish = true, bool measureKernelTime = false,
+ +                                                        bool cleanUp = true);
+ +
+ +        class CV_EXPORTS oclMatExpr;
+ +        //////////////////////////////// oclMat ////////////////////////////////
+ +        class CV_EXPORTS oclMat
+ +        {
+ +        public:
+ +            //! default constructor
+ +            oclMat();
+ +            //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ +            oclMat(int rows, int cols, int type);
+ +            oclMat(Size size, int type);
+ +            //! constucts oclMatrix and fills it with the specified value _s.
+ +            oclMat(int rows, int cols, int type, const Scalar &s);
+ +            oclMat(Size size, int type, const Scalar &s);
+ +            //! copy constructor
+ +            oclMat(const oclMat &m);
+ +
+ +            //! constructor for oclMatrix headers pointing to user-allocated data
+ +            oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +            oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +
+ +            //! creates a matrix header for a part of the bigger matrix
+ +            oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
+ +            oclMat(const oclMat &m, const Rect &roi);
+ +
+ +            //! builds oclMat from Mat. Perfom blocking upload to device.
+ +            explicit oclMat (const Mat &m);
+ +
+ +            //! destructor - calls release()
+ +            ~oclMat();
+ +
+ +            //! assignment operators
+ +            oclMat &operator = (const oclMat &m);
+ +            //! assignment operator. Perfom blocking upload to device.
+ +            oclMat &operator = (const Mat &m);
+ +            oclMat &operator = (const oclMatExpr& expr);
+ +
+ +            //! pefroms blocking upload data to oclMat.
+ +            void upload(const cv::Mat &m);
+ +
+ +
+ +            //! downloads data from device to host memory. Blocking calls.
+ +            operator Mat() const;
+ +            void download(cv::Mat &m) const;
+ +
+ +
+ +            //! returns a new oclMatrix header for the specified row
+ +            oclMat row(int y) const;
+ +            //! returns a new oclMatrix header for the specified column
+ +            oclMat col(int x) const;
+ +            //! ... for the specified row span
+ +            oclMat rowRange(int startrow, int endrow) const;
+ +            oclMat rowRange(const Range &r) const;
+ +            //! ... for the specified column span
+ +            oclMat colRange(int startcol, int endcol) const;
+ +            oclMat colRange(const Range &r) const;
+ +
+ +            //! returns deep copy of the oclMatrix, i.e. the data is copied
+ +            oclMat clone() const;
+ +            //! copies the oclMatrix content to "m".
+ +            // It calls m.create(this->size(), this->type()).
+ +            // It supports any data type
+ +            void copyTo( oclMat &m ) const;
+ +            //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            void copyTo( oclMat &m, const oclMat &mask ) const;
+ +            //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
+ +
+ +            void assignTo( oclMat &m, int type = -1 ) const;
+ +
+ +            //! sets every oclMatrix element to s
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            oclMat& operator = (const Scalar &s);
+ +            //! sets some of the oclMatrix elements to s, according to the mask
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
+ +            //! creates alternative oclMatrix header for the same data, with different
+ +            // number of channels and/or different number of rows. see cvReshape.
+ +            oclMat reshape(int cn, int rows = 0) const;
+ +
+ +            //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
+ +            // previous data is unreferenced if needed.
+ +            void create(int rows, int cols, int type);
+ +            void create(Size size, int type);
+ +
+ +            //! allocates new oclMatrix with specified device memory type.
-             void createEx(Size size, int type, DevMemRW rw_type, 
++            void createEx(int rows, int cols, int type,
+ +                          DevMemRW rw_type, DevMemType mem_type, void* hptr = 0);
- 
++            void createEx(Size size, int type, DevMemRW rw_type,
+ +                          DevMemType mem_type, void* hptr = 0);
+ +
+ +            //! decreases reference counter;
+ +            // deallocate the data when reference counter reaches 0.
+ +            void release();
+ +
+ +            //! swaps with other smart pointer
+ +            void swap(oclMat &mat);
+ +
+ +            //! locates oclMatrix header within a parent oclMatrix. See below
+ +            void locateROI( Size &wholeSize, Point &ofs ) const;
+ +            //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
+ +            oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ +            //! extracts a rectangular sub-oclMatrix
+ +            // (this is a generalized form of row, rowRange etc.)
+ +            oclMat operator()( Range rowRange, Range colRange ) const;
+ +            oclMat operator()( const Rect &roi ) const;
+ +
+ +            oclMat& operator+=( const oclMat& m );
+ +            oclMat& operator-=( const oclMat& m );
+ +            oclMat& operator*=( const oclMat& m );
+ +            oclMat& operator/=( const oclMat& m );
+ +
+ +            //! returns true if the oclMatrix data is continuous
+ +            // (i.e. when there are no gaps between successive rows).
+ +            // similar to CV_IS_oclMat_CONT(cvoclMat->type)
+ +            bool isContinuous() const;
+ +            //! returns element size in bytes,
+ +            // similar to CV_ELEM_SIZE(cvMat->type)
+ +            size_t elemSize() const;
+ +            //! returns the size of element channel in bytes.
+ +            size_t elemSize1() const;
+ +            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+ +            int type() const;
+ +            //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+ +            //! 3 channels element actually use 4 channel space
+ +            int ocltype() const;
+ +            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+ +            int depth() const;
+ +            //! returns element type, similar to CV_MAT_CN(cvMat->type)
+ +            int channels() const;
+ +            //! returns element type, return 4 for 3 channels element,
+ +            //!becuase 3 channels element actually use 4 channel space
+ +            int oclchannels() const;
+ +            //! returns step/elemSize1()
+ +            size_t step1() const;
+ +            //! returns oclMatrix size:
+ +            // width == number of columns, height == number of rows
+ +            Size size() const;
+ +            //! returns true if oclMatrix data is NULL
+ +            bool empty() const;
+ +
+ +            //! returns pointer to y-th row
+ +            uchar* ptr(int y = 0);
+ +            const uchar *ptr(int y = 0) const;
+ +
+ +            //! template version of the above method
+ +            template<typename _Tp> _Tp *ptr(int y = 0);
+ +            template<typename _Tp> const _Tp *ptr(int y = 0) const;
+ +
+ +            //! matrix transposition
+ +            oclMat t() const;
+ +
+ +            /*! includes several bit-fields:
+ +              - the magic signature
+ +              - continuity flag
+ +              - depth
+ +              - number of channels
+ +              */
+ +            int flags;
+ +            //! the number of rows and columns
+ +            int rows, cols;
+ +            //! a distance between successive rows in bytes; includes the gap if any
+ +            size_t step;
+ +            //! pointer to the data(OCL memory object)
+ +            uchar *data;
+ +
+ +            //! pointer to the reference counter;
+ +            // when oclMatrix points to user-allocated data, the pointer is NULL
+ +            int *refcount;
+ +
+ +            //! helper fields used in locateROI and adjustROI
+ +            //datastart and dataend are not used in current version
+ +            uchar *datastart;
+ +            uchar *dataend;
+ +
+ +            //! OpenCL context associated with the oclMat object.
+ +            Context *clCxt;
+ +            //add offset for handle ROI, calculated in byte
+ +            int offset;
+ +            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
+ +            int wholerows;
+ +            int wholecols;
+ +        };
+ +
+ +
+ +        ///////////////////// mat split and merge /////////////////////////////////
+ +        //! Compose a multi-channel array from several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
+ +        CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
+ +
+ +        //! Divides multi-channel array into several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void split(const oclMat &src, oclMat *dst);
+ +        CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
+ +
+ +        ////////////////////////////// Arithmetics ///////////////////////////////////
+ +        //#if defined DOUBLE_SUPPORT
+ +        //typedef double F;
+ +        //#else
+ +        //typedef float F;
+ +        //#endif
+ +        //    CV_EXPORTS void addWeighted(const oclMat& a,F  alpha, const oclMat& b,F beta,F gama, oclMat& c);
+ +        CV_EXPORTS void addWeighted(const oclMat &a, double  alpha, const oclMat &b, double beta, double gama, oclMat &c);
+ +        //! adds one matrix to another (c = a + b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! adds one matrix to another (c = a + b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ +        //! adds scalar to a matrix (c = a + s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ +        //! subtracts one matrix from another (c = a - b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! subtracts one matrix from another (c = a - b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ +        //! subtracts scalar from a matrix (c = a - s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ +        //! subtracts scalar from a matrix (c = a - s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
+ +        //! computes element-wise product of the two arrays (c = a * b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
++        //! multiplies matrix to a number (dst = scalar * src)
++        // supports CV_32FC1 only
++        CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
+ +        //! computes element-wise quotient of the two arrays (c = a / b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+ +        //! computes element-wise quotient of the two arrays (c = a / b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
+ +
+ +        //! compares elements of two arrays (c = a <cmpop> b)
+ +        // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
+ +        CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
+ +
+ +        //! transposes the matrix
+ +        // supports  CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
+ +        CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes element-wise absolute difference of two arrays (c = abs(a - b))
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
+ +
+ +        //! computes mean value and standard deviation of all or selected array elements
+ +        // supports except CV_32F,CV_64F
+ +        CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
+ +
+ +        //! computes norm of array
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports only CV_8UC1 type
+ +        CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
+ +
+ +        //! computes norm of the difference between two arrays
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports only CV_8UC1 type
+ +        CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
+ +
+ +        //! reverses the order of the rows, columns or both in a matrix
+ +        // supports all types
+ +        CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
+ +
+ +        //! computes sum of array elements
+ +        // disabled until fix crash
+ +        // support all types
+ +        CV_EXPORTS Scalar sum(const oclMat &m);
+ +        CV_EXPORTS Scalar absSum(const oclMat &m);
+ +        CV_EXPORTS Scalar sqrSum(const oclMat &m);
+ +
+ +        //! finds global minimum and maximum array elements and returns their values
+ +        // support all C1 types
+ +
+ +        CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+ +
+ +        //! finds global minimum and maximum array elements and returns their values with locations
+ +        // support all C1 types
+ +
+ +        CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
+ +                                  const oclMat &mask = oclMat());
+ +
+ +        //! counts non-zero array elements
+ +        // support all types
+ +        CV_EXPORTS int countNonZero(const oclMat &src);
+ +
+ +        //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+ +        // destination array will have the depth type as lut and the same channels number as source
+ +        //It supports 8UC1 8UC4 only
+ +        CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
+ +
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+ +        //! bilateralFilter
+ +        // supports 8UC1 8UC4
+ +        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+ +        //! computes exponent of each matrix element (b = e**a)
+ +        // supports only CV_32FC1 type
+ +        CV_EXPORTS void exp(const oclMat &a, oclMat &b);
+ +
+ +        //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
+ +        // supports only CV_32FC1 type
+ +        CV_EXPORTS void log(const oclMat &a, oclMat &b);
+ +
+ +        //! computes magnitude of each (x(i), y(i)) vector
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ +        CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ +
+ +        CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
+ +
+ +        //! computes angle (angle(i)) of each (x(i), y(i)) vector
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! the function raises every element of tne input array to p
+ +        //! support only CV_32F CV_64F type
+ +        CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
+ +
+ +        //! converts Cartesian coordinates to polar
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! converts polar coordinates to Cartesian
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
+ +
+ +        //! perfroms per-elements bit-wise inversion
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
+ +        //! calculates per-element bit-wise disjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! calculates per-element bit-wise conjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! calculates per-element bit-wise "exclusive or" operation
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! Logical operators
+ +        CV_EXPORTS oclMat operator ~ (const oclMat &);
+ +        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+ +
+ +
+ +        //! Mathematics operators
+ +        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
+ +
+ +        struct CV_EXPORTS ConvolveBuf
+ +        {
+ +            Size result_size;
+ +            Size block_size;
+ +            Size user_block_size;
+ +            Size dft_size;
+ +
+ +            oclMat image_spect, templ_spect, result_spect;
+ +            oclMat image_block, templ_block, result_data;
+ +
+ +            void create(Size image_size, Size templ_size);
+ +            static Size estimateBlockSize(Size result_size, Size templ_size);
+ +        };
+ +
+ +        //! computes convolution of two images, may use discrete Fourier transform
+ +        //! support only CV_32FC1 type
+ +        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
+ +        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
+ +
+ +        //! Performs a per-element multiplication of two Fourier spectrums.
+ +        //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
+ +        //! support only CV_32FC2 type
+ +        CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
+ +
+ +        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
+ +
+ +        //////////////////////////////// Filter Engine ////////////////////////////////
+ +
+ +        /*!
+ +          The Base Class for 1D or Row-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process 1D data.
+ +          In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseRowFilter_GPU
+ +        {
+ +        public:
+ +            BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseRowFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Column-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process columns of 2D arrays.
+ +          Such filters are used for the "vertical" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseColumnFilter_GPU
+ +        {
+ +        public:
+ +            BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseColumnFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Non-Separable 2D Filters.
+ +
+ +          This is the base class for linear or non-linear 2D filters.
+ +          */
+ +        class CV_EXPORTS BaseFilter_GPU
+ +        {
+ +        public:
+ +            BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
+ +                : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
+ +            virtual ~BaseFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            Size ksize;
+ +            Point anchor;
+ +            int borderType;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Filter Engine.
+ +
+ +          The class can be used to apply an arbitrary filtering operation to an image.
+ +          It contains all the necessary intermediate buffers.
+ +          */
+ +        class CV_EXPORTS FilterEngine_GPU
+ +        {
+ +        public:
+ +            virtual ~FilterEngine_GPU() {}
+ +
+ +            virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
+ +        };
+ +
+ +        //! returns the non-separable filter engine with the specified filter
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
+ +
+ +        //! returns the primitive row filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns the primitive column filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
+ +
+ +        //! returns the separable linear filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
+ +                const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns the separable filter engine with the specified filters
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ +                const Ptr<BaseColumnFilter_GPU> &columnFilter);
+ +
+ +        //! returns the Gaussian filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns filter engine for the generalized Sobel operator
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
+ +
+ +        //! applies Laplacian operator to the image
+ +        // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
+ +        CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
+ +
+ +        //! returns 2D box filter
+ +        // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
+ +                const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns box filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D filter with the specified kernel
+ +        // supports CV_8UC1 and CV_8UC4 types
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
+ +                Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns the non-separable linear filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using the normalized box filter
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
+ +        CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
+ +                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D morphological filter
+ +        //! only MORPH_ERODE and MORPH_DILATE are supported
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
+ +                Point anchor = Point(-1, -1));
+ +
+ +        //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int iterations = 1);
+ +
+ +        //! a synonym for normalized box filter
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
+ +                                int borderType = BORDER_CONSTANT)
+ +        {
+ +            boxFilter(src, dst, -1, ksize, anchor, borderType);
+ +        }
+ +
+ +        //! applies non-separable 2D linear filter to the image
+ +        CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
+ +                                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! applies separable 2D linear filter to the image
+ +        CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
+ +                                    Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies generalized Sobel operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies the vertical or horizontal Scharr operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using Gaussian filter.
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! erodes the image (applies the local minimum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                               int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! dilates the image (applies the local maximum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! applies an advanced morphological operation to the image
+ +        CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                      int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        ////////////////////////////// Image processing //////////////////////////////
+ +        //! Does mean shift filtering on GPU.
+ +        CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
+ +                                           TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift procedure on GPU.
+ +        CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
+ +                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift segmentation with elimiation of small regions.
+ +        CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
+ +                                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! applies fixed threshold to the image.
+ +        // supports CV_8UC1 and CV_32FC1 data type
+ +        // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
+ +        CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
+ +
+ +        //! resizes the image
+ +        // Supports INTER_NEAREST, INTER_LINEAR
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
+ +
+ +        //! Applies a generic geometrical transformation to an image.
+ +
+ +        // Supports INTER_NEAREST, INTER_LINEAR.
+ +
+ +        // Map1 supports CV_16SC2, CV_32FC2  types.
+ +
+ +        // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
+ +
+ +        CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
+ +
+ +        //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+ +        // supports CV_8UC1, CV_8UC4, CV_32SC1 types
+ +        CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
+ +
+ +        //! Smoothes image using median filter
+ +        // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
+ +        CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
+ +
+ +        //! warps the image using affine transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! warps the image using perspective transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! computes the integral image and integral for the squared image
+ +        // sum will have CV_32S type, sqsum - CV32F type
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
+ +        CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ +
+ +        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +
+ +#if 0
+ +        class CV_EXPORTS OclCascadeClassifier : public  cv::CascadeClassifier
+ +        {
+ +        public:
+ +            OclCascadeClassifier() {};
+ +            ~OclCascadeClassifier() {};
+ +
+ +            CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
+ +                                        int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
+ +        };
+ +#endif
+ +
++        class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
++        {
++        public:
++            OclCascadeClassifierBuf() :
++                m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
+ +
++            ~OclCascadeClassifierBuf() {}
++
++            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
++                                  double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
++                                  Size minSize = Size(), Size maxSize = Size());
++            void release();
++
++        private:
++            void Init(const int rows, const int cols, double scaleFactor, int flags,
++                      const int outputsz, const size_t localThreads[],
++                      Size minSize, Size maxSize);
++            void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
++            void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
++                                         const double scaleFactor, const size_t localThreads[],
++                                         Size minSize, Size maxSize);
++            void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
++
++            int m_rows;
++            int m_cols;
++            int m_flags;
++            int m_loopcount;
++            int m_nodenum;
++            bool findBiggestObject;
++            bool initialized;
++            double m_scaleFactor;
++            Size m_minSize;
++            Size m_maxSize;
++            std::vector<Size> sizev;
++            std::vector<float> scalev;
++            oclMat gimg1, gsum, gsqsum;
++            void * buffers;
++        };
+ +
+ +        /////////////////////////////// Pyramid /////////////////////////////////////
+ +        CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
+ +
+ +        //! upsamples the source image and then smoothes it
+ +        CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
+ +
+ +        //! performs linear blending of two images
+ +        //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
+ +
+ +        //! computes vertical sum, supports only CV_32FC1 images
+ +        CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
+ +
+ +        ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+ +        struct CV_EXPORTS MatchTemplateBuf
+ +        {
+ +            Size user_block_size;
+ +            oclMat imagef, templf;
+ +            std::vector<oclMat> images;
+ +            std::vector<oclMat> image_sums;
+ +            std::vector<oclMat> image_sqsums;
+ +        };
+ +
- 
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
+ +
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
+ +
+ +
+ +
+ +        ///////////////////////////////////////////// Canny /////////////////////////////////////////////
+ +        struct CV_EXPORTS CannyBuf;
+ +
+ +        //! compute edges of the input image using Canny operator
+ +        // Support CV_8UC1 only
+ +        CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +
+ +        struct CV_EXPORTS CannyBuf
+ +        {
+ +            CannyBuf() : counter(NULL) {}
+ +            ~CannyBuf()
+ +            {
+ +                release();
+ +            }
+ +            explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
+ +            {
+ +                create(image_size, apperture_size);
+ +            }
+ +            CannyBuf(const oclMat &dx_, const oclMat &dy_);
+ +            void create(const Size &image_size, int apperture_size = 3);
+ +            void release();
+ +
+ +            oclMat dx, dy;
+ +            oclMat dx_buf, dy_buf;
+ +            oclMat magBuf, mapBuf;
+ +            oclMat trackBuf1, trackBuf2;
+ +            void *counter;
+ +            Ptr<FilterEngine_GPU> filterDX, filterDY;
+ +        };
+ +
+ +        ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
+ +        //! HoughCircles
+ +        struct HoughCirclesBuf
+ +        {
+ +            oclMat edges;
+ +            oclMat accum;
+ +            oclMat srcPoints;
+ +            oclMat centers;
+ +            CannyBuf cannyBuf;
+ +        };
+ +
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
+ +
+ +
+ +        ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+ +        //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ +        //! Param dft_size is the size of DFT transform.
+ +        //!
+ +        //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+ +        // support src type of CV32FC1, CV32FC2
+ +        // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+ +        // dft_size is the size of original input, which is used for transformation from complex to real.
+ +        // dft_size must be powers of 2, 3 and 5
+ +        // real to complex dft requires at least v1.8 clAmdFft
+ +        // real to complex dft output is not the same with cpu version
+ +        // real to complex and complex to real does not support DFT_ROWS
+ +        CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
+ +
+ +        //! implements generalized matrix product algorithm GEMM from BLAS
+ +        // The functionality requires clAmdBlas library
+ +        // only support type CV_32FC1
+ +        // flag GEMM_3_T is not supported
+ +        CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ +                             const oclMat &src3, double beta, oclMat &dst, int flags = 0);
+ +
+ +        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+ +
+ +        struct CV_EXPORTS HOGDescriptor
+ +
+ +        {
+ +
+ +            enum { DEFAULT_WIN_SIGMA = -1 };
+ +
+ +            enum { DEFAULT_NLEVELS = 64 };
+ +
+ +            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+ +
+ +
+ +
+ +            HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
+ +
+ +                          Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
+ +
+ +                          int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
+ +
+ +                          double threshold_L2hys = 0.2, bool gamma_correction = true,
+ +
+ +                          int nlevels = DEFAULT_NLEVELS);
+ +
+ +
+ +
+ +            size_t getDescriptorSize() const;
+ +
+ +            size_t getBlockHistogramSize() const;
+ +
+ +
+ +
+ +            void setSVMDetector(const std::vector<float> &detector);
+ +
+ +
+ +
+ +            static std::vector<float> getDefaultPeopleDetector();
+ +
+ +            static std::vector<float> getPeopleDetector48x96();
+ +
+ +            static std::vector<float> getPeopleDetector64x128();
+ +
+ +
+ +
+ +            void detect(const oclMat &img, std::vector<Point> &found_locations,
+ +
+ +                        double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                        Size padding = Size());
+ +
+ +
+ +
+ +            void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
+ +
+ +                                  double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                                  Size padding = Size(), double scale0 = 1.05,
+ +
+ +                                  int group_threshold = 2);
+ +
+ +
+ +
+ +            void getDescriptors(const oclMat &img, Size win_stride,
+ +
+ +                                oclMat &descriptors,
+ +
+ +                                int descr_format = DESCR_FORMAT_COL_BY_COL);
+ +
+ +
+ +
+ +            Size win_size;
+ +
+ +            Size block_size;
+ +
+ +            Size block_stride;
+ +
+ +            Size cell_size;
+ +
+ +            int nbins;
+ +
+ +            double win_sigma;
+ +
+ +            double threshold_L2hys;
+ +
+ +            bool gamma_correction;
+ +
+ +            int nlevels;
+ +
+ +
+ +
+ +        protected:
+ +
+ +            // initialize buffers; only need to do once in case of multiscale detection
+ +
+ +            void init_buffer(const oclMat &img, Size win_stride);
+ +
+ +
+ +
+ +            void computeBlockHistograms(const oclMat &img);
+ +
+ +            void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
+ +
+ +
+ +
+ +            double getWinSigma() const;
+ +
+ +            bool checkDetectorSize() const;
+ +
+ +
+ +
+ +            static int numPartsWithin(int size, int part_size, int stride);
+ +
+ +            static Size numPartsWithin(Size size, Size part_size, Size stride);
+ +
+ +
+ +
+ +            // Coefficients of the separating plane
+ +
+ +            float free_coef;
+ +
+ +            oclMat detector;
+ +
+ +
+ +
+ +            // Results of the last classification step
+ +
+ +            oclMat labels;
+ +
+ +            Mat labels_host;
+ +
+ +
+ +
+ +            // Results of the last histogram evaluation step
+ +
+ +            oclMat block_hists;
+ +
+ +
+ +
+ +            // Gradients conputation results
+ +
+ +            oclMat grad, qangle;
+ +
+ +
+ +
+ +            // scaled image
+ +
+ +            oclMat image_scale;
+ +
+ +
+ +
+ +            // effect size of input image (might be different from original size after scaling)
+ +
+ +            Size effect_size;
+ +
+ +        };
+ +
+ +
+ +        ////////////////////////feature2d_ocl/////////////////
+ +        /****************************************************************************************\
+ +        *                                      Distance                                          *
+ +        \****************************************************************************************/
- 
+ +        template<typename T>
+ +        struct CV_EXPORTS Accumulator
+ +        {
+ +            typedef T Type;
+ +        };
- 
+ +        template<> struct Accumulator<unsigned char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<unsigned short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +
+ +        /*
+ +         * Manhattan distance (city block distance) functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L1
+ +        {
+ +            enum { normType = NORM_L1 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return normL1<ValueType, ResultType>(a, b, size);
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Euclidean distance functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L2
+ +        {
+ +            enum { normType = NORM_L2 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ +         * bit count of A exclusive XOR'ed with B
+ +         */
+ +        struct CV_EXPORTS Hamming
+ +        {
+ +            enum { normType = NORM_HAMMING };
+ +            typedef unsigned char ValueType;
+ +            typedef int ResultType;
+ +
+ +            /** this will count the bits in a ^ b
+ +             */
+ +            ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
+ +            {
+ +                return normHamming(a, b, size);
+ +            }
+ +        };
+ +
+ +        ////////////////////////////////// BruteForceMatcher //////////////////////////////////
+ +
+ +        class CV_EXPORTS BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            enum DistType {L1Dist = 0, L2Dist, HammingDist};
- 
- 
+ +            explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
+ +
- 
+ +            // Add descriptors to train descriptor collection
- 
- 
+ +            void add(const std::vector<oclMat> &descCollection);
+ +
- 
+ +            // Get train descriptors collection
- 
- 
+ +            const std::vector<oclMat> &getTrainDescriptors() const;
+ +
- 
+ +            // Clear train descriptors collection
- 
- 
+ +            void clear();
+ +
- 
+ +            // Return true if there are not train descriptors in collection
- 
- 
+ +            bool empty() const;
+ +
- 
+ +            // Return true if the matcher supports mask in match methods
- 
- 
+ +            bool isMaskSupported() const;
+ +
- 
+ +            // Find one best match for each query descriptor
- 
+ +            void matchSingle(const oclMat &query, const oclMat &train,
- 
+ +                             oclMat &trainIdx, oclMat &distance,
- 
- 
+ +                             const oclMat &mask = oclMat());
+ +
- 
+ +            // Download trainIdx and distance and convert it to CPU vector with DMatch
- 
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
- 
+ +            // Convert trainIdx and distance to vector with DMatch
- 
- 
+ +            static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
- 
+ +            // Find one best match for each query descriptor
- 
- 
+ +            void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
+ +
- 
+ +            // Make gpu collection of trains and masks in suitable format for matchCollection function
- 
- 
+ +            void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
- 
+ +            // Find one best match from train collection for each query descriptor
- 
+ +            void matchCollection(const oclMat &query, const oclMat &trainCollection,
- 
+ +                                 oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
- 
- 
+ +                                 const oclMat &masks = oclMat());
+ +
- 
+ +            // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
- 
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
- 
+ +            // Convert trainIdx, imgIdx and distance to vector with DMatch
- 
- 
+ +            static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
- 
+ +            // Find one best match from train collection for each query descriptor.
- 
- 
+ +            void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
- 
+ +            // Find k best matches for each query descriptor (in increasing order of distances)
- 
+ +            void knnMatchSingle(const oclMat &query, const oclMat &train,
- 
+ +                                oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
- 
- 
+ +                                const oclMat &mask = oclMat());
+ +
- 
+ +            // Download trainIdx and distance and convert it to vector with DMatch
- 
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
- 
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
- 
+ +            // matches vector will not contain matches for fully masked out query descriptors.
- 
+ +            static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
- 
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- 
+ +            // Convert trainIdx and distance to vector with DMatch
- 
+ +            static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
- 
- 
+ +                                        std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
- 
+ +            // Find k best matches for each query descriptor (in increasing order of distances).
- 
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
- 
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
- 
+ +            // matches vector will not contain matches for fully masked out query descriptors.
- 
+ +            void knnMatch(const oclMat &query, const oclMat &train,
- 
+ +                          std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
- 
- 
+ +                          bool compactResult = false);
+ +
- 
+ +            // Find k best matches from train collection for each query descriptor (in increasing order of distances)
- 
+ +            void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
- 
+ +                                     oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
- 
- 
+ +                                     const oclMat &maskCollection = oclMat());
+ +
- 
+ +            // Download trainIdx and distance and convert it to vector with DMatch
- 
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
- 
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
- 
+ +            // matches vector will not contain matches for fully masked out query descriptors.
- 
+ +            static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
- 
+ +                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- 
+ +            // Convert trainIdx and distance to vector with DMatch
- 
+ +            static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
- 
- 
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
- 
+ +            // Find k best matches  for each query descriptor (in increasing order of distances).
- 
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
- 
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
- 
+ +            // matches vector will not contain matches for fully masked out query descriptors.
- 
+ +            void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
- 
- 
+ +                          const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
- 
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
- 
+ +            // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
- 
+ +            // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
- 
+ +            // because it didn't have enough memory.
- 
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
- 
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
- 
+ +            // Matches doesn't sorted.
- 
+ +            void radiusMatchSingle(const oclMat &query, const oclMat &train,
- 
+ +                                   oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
- 
- 
+ +                                   const oclMat &mask = oclMat());
+ +
- 
+ +            // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
- 
+ +            // matches will be sorted in increasing order of distances.
- 
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
- 
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
- 
+ +            // matches vector will not contain matches for fully masked out query descriptors.
- 
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
- 
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- 
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
- 
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
- 
- 
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
- 
+ +            // Find best matches for each query descriptor which have distance less than maxDistance
- 
+ +            // in increasing order of distances).
- 
+ +            void radiusMatch(const oclMat &query, const oclMat &train,
- 
+ +                             std::vector< std::vector<DMatch> > &matches, float maxDistance,
- 
- 
+ +                             const oclMat &mask = oclMat(), bool compactResult = false);
+ +
- 
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
- 
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
- 
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
- 
+ +            // Matches doesn't sorted.
- 
+ +            void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
- 
- 
+ +                                       const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
- 
+ +            // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
- 
+ +            // matches will be sorted in increasing order of distances.
- 
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
- 
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
- 
+ +            // matches vector will not contain matches for fully masked out query descriptors.
- 
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
- 
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- 
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
- 
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
- 
- 
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
- 
+ +            // Find best matches from train collection for each query descriptor which have distance less than
- 
+ +            // maxDistance (in increasing order of distances).
- 
+ +            void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
- 
- 
+ +                             const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
- 
- 
+ +            DistType distType;
+ +
- 
+ +        private:
- 
+ +            std::vector<oclMat> trainDescCollection;
- 
- 
+ +        };
+ +
- 
+ +        template <class Distance>
- 
- 
+ +        class CV_EXPORTS BruteForceMatcher_OCL;
+ +
- 
+ +        template <typename T>
- 
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
- 
+ +        {
- 
+ +        public:
- 
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
- 
+ +            explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
- 
+ +        };
- 
+ +        template <typename T>
- 
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
- 
+ +        {
- 
+ +        public:
- 
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
- 
+ +            explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
- 
+ +        };
- 
+ +        template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
- 
+ +        {
- 
+ +        public:
- 
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
- 
+ +            explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
- 
+ +        };
+ +
- 
++        class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
++        {
++        public:
++            explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
++        };
+ +
+ +        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+ +
+ +        class CV_EXPORTS PyrLKOpticalFlow
- 
+ +        {
- 
+ +        public:
- 
+ +            PyrLKOpticalFlow()
- 
+ +            {
- 
+ +                winSize = Size(21, 21);
- 
+ +                maxLevel = 3;
- 
+ +                iters = 30;
- 
+ +                derivLambda = 0.5;
- 
+ +                useInitialFlow = false;
- 
+ +                minEigThreshold = 1e-4f;
- 
+ +                getMinEigenVals = false;
- 
+ +                isDeviceArch11_ = false;
- 
- 
+ +            }
+ +
- 
+ +            void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
- 
- 
+ +                        oclMat &status, oclMat *err = 0);
+ +
- 
- 
+ +            void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
+ +
- 
+ +            Size winSize;
- 
+ +            int maxLevel;
- 
+ +            int iters;
- 
+ +            double derivLambda;
- 
+ +            bool useInitialFlow;
- 
+ +            float minEigThreshold;
- 
- 
+ +            bool getMinEigenVals;
+ +
- 
+ +            void releaseMemory()
- 
+ +            {
- 
+ +                dx_calcBuf_.release();
- 
- 
+ +                dy_calcBuf_.release();
+ +
- 
+ +                prevPyr_.clear();
- 
- 
+ +                nextPyr_.clear();
+ +
- 
+ +                dx_buf_.release();
- 
+ +                dy_buf_.release();
- 
- 
+ +            }
+ +
- 
+ +        private:
- 
- 
+ +            void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
+ +
- 
- 
+ +            void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
+ +
- 
+ +            oclMat dx_calcBuf_;
- 
- 
+ +            oclMat dy_calcBuf_;
+ +
- 
+ +            std::vector<oclMat> prevPyr_;
- 
- 
+ +            std::vector<oclMat> nextPyr_;
+ +
- 
+ +            oclMat dx_buf_;
- 
- 
+ +            oclMat dy_buf_;
+ +
- 
+ +            oclMat uPyr_[2];
- 
- 
+ +            oclMat vPyr_[2];
+ +
- 
+ +            bool isDeviceArch11_;
+ +        };
+ +        //////////////// build warping maps ////////////////////
+ +        //! builds plane warping maps
+ +        CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds cylindrical warping maps
+ +        CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds spherical warping maps
+ +        CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds Affine warping maps
+ +        CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        //! builds Perspective warping maps
+ +        CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
+ +        //! Interpolate frames (images) using provided optical flow (displacement field).
+ +        //! frame0   - frame 0 (32-bit floating point images, single channel)
+ +        //! frame1   - frame 1 (the same type and size)
+ +        //! fu       - forward horizontal displacement
+ +        //! fv       - forward vertical displacement
+ +        //! bu       - backward horizontal displacement
+ +        //! bv       - backward vertical displacement
+ +        //! pos      - new frame position
+ +        //! newFrame - new frame
+ +        //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
+ +        //!            occlusion masks            0, occlusion masks            1,
+ +        //!            interpolated forward flow  0, interpolated forward flow  1,
+ +        //!            interpolated backward flow 0, interpolated backward flow 1
+ +        //!
+ +        CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
+ +                                          const oclMat &fu, const oclMat &fv,
+ +                                          const oclMat &bu, const oclMat &bv,
+ +                                          float pos, oclMat &newFrame, oclMat &buf);
+ +
+ +        //! computes moments of the rasterized shape or a vector of points
+ +        CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
+ +
+ +        class CV_EXPORTS StereoBM_OCL
+ +        {
+ +        public:
+ +            enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+ +
+ +            enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+ +
+ +            //! the default constructor
+ +            StereoBM_OCL();
+ +            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+ +            StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+ +
+ +            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+ +            //! Output disparity has CV_8U type.
+ +            void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
+ +
+ +            //! Some heuristics that tries to estmate
+ +            // if current GPU will be faster then CPU in this algorithm.
+ +            // It queries current active device.
+ +            static bool checkIfGpuCallReasonable();
+ +
+ +            int preset;
+ +            int ndisp;
+ +            int winSize;
+ +
+ +            // If avergeTexThreshold  == 0 => post procesing is disabled
+ +            // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+ +            // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+ +            // i.e. input left image is low textured.
+ +            float avergeTexThreshold;
+ +        private:
+ +            oclMat minSSD, leBuf, riBuf;
+ +        };
++
+ +        class CV_EXPORTS StereoBeliefPropagation
+ +        {
+ +        public:
+ +            enum { DEFAULT_NDISP  = 64 };
+ +            enum { DEFAULT_ITERS  = 5  };
+ +            enum { DEFAULT_LEVELS = 5  };
+ +            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
+ +            explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
+ +                                             int iters  = DEFAULT_ITERS,
+ +                                             int levels = DEFAULT_LEVELS,
+ +                                             int msg_type = CV_16S);
+ +            StereoBeliefPropagation(int ndisp, int iters, int levels,
+ +                                    float max_data_term, float data_weight,
+ +                                    float max_disc_term, float disc_single_jump,
+ +                                    int msg_type = CV_32F);
+ +            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ +            void operator()(const oclMat &data, oclMat &disparity);
+ +            int ndisp;
+ +            int iters;
+ +            int levels;
+ +            float max_data_term;
+ +            float data_weight;
+ +            float max_disc_term;
+ +            float disc_single_jump;
+ +            int msg_type;
+ +        private:
+ +            oclMat u, d, l, r, u2, d2, l2, r2;
+ +            std::vector<oclMat> datas;
+ +            oclMat out;
+ +        };
++
++        class CV_EXPORTS StereoConstantSpaceBP
++        {
++        public:
++            enum { DEFAULT_NDISP    = 128 };
++            enum { DEFAULT_ITERS    = 8   };
++            enum { DEFAULT_LEVELS   = 4   };
++            enum { DEFAULT_NR_PLANE = 4   };
++            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
++            explicit StereoConstantSpaceBP(
++                int ndisp    = DEFAULT_NDISP,
++                int iters    = DEFAULT_ITERS,
++                int levels   = DEFAULT_LEVELS,
++                int nr_plane = DEFAULT_NR_PLANE,
++                int msg_type = CV_32F);
++            StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
++                float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
++                int min_disp_th = 0,
++                int msg_type = CV_32F);
++            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
++            int ndisp;
++            int iters;
++            int levels;
++            int nr_plane;
++            float max_data_term;
++            float data_weight;
++            float max_disc_term;
++            float disc_single_jump;
++            int min_disp_th;
++            int msg_type;
++            bool use_local_init_data_cost;
++        private:
++            oclMat u[2], d[2], l[2], r[2];
++            oclMat disp_selected_pyr[2];
++            oclMat data_cost;
++            oclMat data_cost_selected;
++            oclMat temp;
++            oclMat out;
++        };
++
++        // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
++        //
++        // see reference:
++        //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
++        //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
++        class CV_EXPORTS OpticalFlowDual_TVL1_OCL
++        {
++        public:
++            OpticalFlowDual_TVL1_OCL();
++
++            void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
++
++            void collectGarbage();
++
++            /**
++            * Time step of the numerical scheme.
++            */
++            double tau;
++
++            /**
++            * Weight parameter for the data term, attachment parameter.
++            * This is the most relevant parameter, which determines the smoothness of the output.
++            * The smaller this parameter is, the smoother the solutions we obtain.
++            * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
++            */
++            double lambda;
++
++            /**
++            * Weight parameter for (u - v)^2, tightness parameter.
++            * It serves as a link between the attachment and the regularization terms.
++            * In theory, it should have a small value in order to maintain both parts in correspondence.
++            * The method is stable for a large range of values of this parameter.
++            */
++            double theta;
++
++            /**
++            * Number of scales used to create the pyramid of images.
++            */
++            int nscales;
++
++            /**
++            * Number of warpings per scale.
++            * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
++            * This is a parameter that assures the stability of the method.
++            * It also affects the running time, so it is a compromise between speed and accuracy.
++            */
++            int warps;
++
++            /**
++            * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
++            * A small value will yield more accurate solutions at the expense of a slower convergence.
++            */
++            double epsilon;
++
++            /**
++            * Stopping criterion iterations number used in the numerical scheme.
++            */
++            int iterations;
++
++            bool useInitialFlow;
++
++        private:
++            void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
++
++            std::vector<oclMat> I0s;
++            std::vector<oclMat> I1s;
++            std::vector<oclMat> u1s;
++            std::vector<oclMat> u2s;
++
++            oclMat I1x_buf;
++            oclMat I1y_buf;
++
++            oclMat I1w_buf;
++            oclMat I1wx_buf;
++            oclMat I1wy_buf;
++
++            oclMat grad_buf;
++            oclMat rho_c_buf;
++
++            oclMat p11_buf;
++            oclMat p12_buf;
++            oclMat p21_buf;
++            oclMat p22_buf;
++
++            oclMat diff_buf;
++            oclMat norm_buf;
++        };
+ +    }
+ +}
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( push)
+ +#  pragma warning( disable: 4267)
+ +#endif
+ +#include "opencv2/ocl/matrix_operations.hpp"
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( pop)
+ +#endif
+ +
+ +#endif /* __OPENCV_OCL_HPP__ */
diff --cc modules/ocl/include/opencv2/ocl/ocl.hpp

index eb8ff42,1cace84..3dd4654
--- 1/modules/ocl/include/opencv2/ocl/ocl.hpp
--- 2/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@@ -41,8 -41,1507 +41,8 @@@
   //
   //M*/
   
- -#ifndef __OPENCV_OCL_HPP__
- -#define __OPENCV_OCL_HPP__
- -
- -#include <memory>
- -#include <vector>
- -
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/objdetect/objdetect.hpp"
- -#include "opencv2/features2d/features2d.hpp"
- -
- -namespace cv
- -{
- -    namespace ocl
- -    {
- -        using std::auto_ptr;
- -        enum
- -        {
- -            CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
- -            CVCL_DEVICE_TYPE_CPU         = (1 << 1),
- -            CVCL_DEVICE_TYPE_GPU         = (1 << 2),
- -            CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
- -            //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
- -            CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
- -        };
- -
- -        enum DevMemRW
- -        {
- -            DEVICE_MEM_R_W = 0,
- -            DEVICE_MEM_R_ONLY,
- -            DEVICE_MEM_W_ONLY
- -        };
- -
- -        enum DevMemType
- -        {
- -            DEVICE_MEM_DEFAULT = 0,
- -            DEVICE_MEM_AHP,         //alloc host pointer
- -            DEVICE_MEM_UHP,         //use host pointer
- -            DEVICE_MEM_CHP,         //copy host pointer
- -            DEVICE_MEM_PM           //persistent memory
- -        };
- -
- -        //Get the global device memory and read/write type
- -        //return 1 if unified memory system supported, otherwise return 0
- -        CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
- -
- -        //Set the global device memory and read/write type,
- -        //the newly generated oclMat will all use this type
- -        //return -1 if the target type is unsupported, otherwise return 0
- -        CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
- -
- -        //this class contains ocl runtime information
- -        class CV_EXPORTS Info
- -        {
- -        public:
- -            struct Impl;
- -            Impl *impl;
- -
- -            Info();
- -            Info(const Info &m);
- -            ~Info();
- -            void release();
- -            Info &operator = (const Info &m);
- -            std::vector<string> DeviceName;
- -        };
- -        //////////////////////////////// Initialization & Info ////////////////////////
- -        //this function may be obsoleted
- -        //CV_EXPORTS cl_device_id getDevice();
- -        //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
- -        //each Info relates to an OpenCL platform
- -        //there is one or more devices in each platform, each one has a separate name
- -        CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
- -
- -        //set device you want to use, optional function after getDevice be called
- -        //the devnum is the index of the selected device in DeviceName vector of INfo
- -        CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
- -
- -        //optional function, if you want save opencl binary kernel to the file, set its path
- -        CV_EXPORTS  void setBinpath(const char *path);
- -
- -        //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
- -        CV_EXPORTS void* getoclContext();
- -
- -        CV_EXPORTS void* getoclCommandQueue();
- -
- -        //explicit call clFinish. The global command queue will be used.
- -        CV_EXPORTS void finish();
- -
- -        //this function enable ocl module to use customized cl_context and cl_command_queue
- -        //getDevice also need to be called before this function
- -        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
- -
- -        //////////////////////////////// Error handling ////////////////////////
- -        CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
- -
- -        //////////////////////////////// OpenCL context ////////////////////////
- -        //This is a global singleton class used to represent a OpenCL context.
- -        class CV_EXPORTS Context
- -        {
- -        protected:
- -            Context();
- -            friend class auto_ptr<Context>;
- -
- -        private:
- -            static auto_ptr<Context> clCxt;
- -            static int val;
- -        public:
- -            ~Context();
- -            void release();
- -            Info::Impl* impl;
- -
- -            static Context* getContext();
- -            static void setContext(Info &oclinfo);
- -
- -            enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
- -            bool supportsFeature(int ftype);
- -            size_t computeUnits();
- -            void* oclContext();
- -            void* oclCommandQueue();
- -        };
- -
- -        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
- -        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
- -                                                        const char **source, string kernelName,
- -                                                        size_t globalThreads[3], size_t localThreads[3],
- -                                                        std::vector< std::pair<size_t, const void *> > &args,
- -                                                        int channels, int depth, const char *build_options,
- -                                                        bool finish = true, bool measureKernelTime = false,
- -                                                        bool cleanUp = true);
- -
- -        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
- -        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
- -                                                        const char **fileName, const int numFiles, string kernelName,
- -                                                        size_t globalThreads[3], size_t localThreads[3],
- -                                                        std::vector< std::pair<size_t, const void *> > &args,
- -                                                        int channels, int depth, const char *build_options,
- -                                                        bool finish = true, bool measureKernelTime = false,
- -                                                        bool cleanUp = true);
- -
- -        class CV_EXPORTS oclMatExpr;
- -        //////////////////////////////// oclMat ////////////////////////////////
- -        class CV_EXPORTS oclMat
- -        {
- -        public:
- -            //! default constructor
- -            oclMat();
- -            //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
- -            oclMat(int rows, int cols, int type);
- -            oclMat(Size size, int type);
- -            //! constucts oclMatrix and fills it with the specified value _s.
- -            oclMat(int rows, int cols, int type, const Scalar &s);
- -            oclMat(Size size, int type, const Scalar &s);
- -            //! copy constructor
- -            oclMat(const oclMat &m);
- -
- -            //! constructor for oclMatrix headers pointing to user-allocated data
- -            oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
- -            oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
- -
- -            //! creates a matrix header for a part of the bigger matrix
- -            oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
- -            oclMat(const oclMat &m, const Rect &roi);
- -
- -            //! builds oclMat from Mat. Perfom blocking upload to device.
- -            explicit oclMat (const Mat &m);
- -
- -            //! destructor - calls release()
- -            ~oclMat();
- -
- -            //! assignment operators
- -            oclMat &operator = (const oclMat &m);
- -            //! assignment operator. Perfom blocking upload to device.
- -            oclMat &operator = (const Mat &m);
- -            oclMat &operator = (const oclMatExpr& expr);
- -
- -            //! pefroms blocking upload data to oclMat.
- -            void upload(const cv::Mat &m);
- -
- -
- -            //! downloads data from device to host memory. Blocking calls.
- -            operator Mat() const;
- -            void download(cv::Mat &m) const;
- -
- -
- -            //! returns a new oclMatrix header for the specified row
- -            oclMat row(int y) const;
- -            //! returns a new oclMatrix header for the specified column
- -            oclMat col(int x) const;
- -            //! ... for the specified row span
- -            oclMat rowRange(int startrow, int endrow) const;
- -            oclMat rowRange(const Range &r) const;
- -            //! ... for the specified column span
- -            oclMat colRange(int startcol, int endcol) const;
- -            oclMat colRange(const Range &r) const;
- -
- -            //! returns deep copy of the oclMatrix, i.e. the data is copied
- -            oclMat clone() const;
- -            //! copies the oclMatrix content to "m".
- -            // It calls m.create(this->size(), this->type()).
- -            // It supports any data type
- -            void copyTo( oclMat &m ) const;
- -            //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
- -            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- -            void copyTo( oclMat &m, const oclMat &mask ) const;
- -            //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
- -            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- -            void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
- -
- -            void assignTo( oclMat &m, int type = -1 ) const;
- -
- -            //! sets every oclMatrix element to s
- -            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- -            oclMat& operator = (const Scalar &s);
- -            //! sets some of the oclMatrix elements to s, according to the mask
- -            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
- -            oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
- -            //! creates alternative oclMatrix header for the same data, with different
- -            // number of channels and/or different number of rows. see cvReshape.
- -            oclMat reshape(int cn, int rows = 0) const;
- -
- -            //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
- -            // previous data is unreferenced if needed.
- -            void create(int rows, int cols, int type);
- -            void create(Size size, int type);
- -
- -            //! allocates new oclMatrix with specified device memory type.
- -            void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
- -            void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
- -
- -            //! decreases reference counter;
- -            // deallocate the data when reference counter reaches 0.
- -            void release();
- -
- -            //! swaps with other smart pointer
- -            void swap(oclMat &mat);
- -
- -            //! locates oclMatrix header within a parent oclMatrix. See below
- -            void locateROI( Size &wholeSize, Point &ofs ) const;
- -            //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
- -            oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
- -            //! extracts a rectangular sub-oclMatrix
- -            // (this is a generalized form of row, rowRange etc.)
- -            oclMat operator()( Range rowRange, Range colRange ) const;
- -            oclMat operator()( const Rect &roi ) const;
- -
- -            oclMat& operator+=( const oclMat& m );
- -            oclMat& operator-=( const oclMat& m );
- -            oclMat& operator*=( const oclMat& m );
- -            oclMat& operator/=( const oclMat& m );
- -
- -            //! returns true if the oclMatrix data is continuous
- -            // (i.e. when there are no gaps between successive rows).
- -            // similar to CV_IS_oclMat_CONT(cvoclMat->type)
- -            bool isContinuous() const;
- -            //! returns element size in bytes,
- -            // similar to CV_ELEM_SIZE(cvMat->type)
- -            size_t elemSize() const;
- -            //! returns the size of element channel in bytes.
- -            size_t elemSize1() const;
- -            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
- -            int type() const;
- -            //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
- -            //! 3 channels element actually use 4 channel space
- -            int ocltype() const;
- -            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
- -            int depth() const;
- -            //! returns element type, similar to CV_MAT_CN(cvMat->type)
- -            int channels() const;
- -            //! returns element type, return 4 for 3 channels element,
- -            //!becuase 3 channels element actually use 4 channel space
- -            int oclchannels() const;
- -            //! returns step/elemSize1()
- -            size_t step1() const;
- -            //! returns oclMatrix size:
- -            // width == number of columns, height == number of rows
- -            Size size() const;
- -            //! returns true if oclMatrix data is NULL
- -            bool empty() const;
- -
- -            //! returns pointer to y-th row
- -            uchar* ptr(int y = 0);
- -            const uchar *ptr(int y = 0) const;
- -
- -            //! template version of the above method
- -            template<typename _Tp> _Tp *ptr(int y = 0);
- -            template<typename _Tp> const _Tp *ptr(int y = 0) const;
- -
- -            //! matrix transposition
- -            oclMat t() const;
- -
- -            /*! includes several bit-fields:
- -              - the magic signature
- -              - continuity flag
- -              - depth
- -              - number of channels
- -              */
- -            int flags;
- -            //! the number of rows and columns
- -            int rows, cols;
- -            //! a distance between successive rows in bytes; includes the gap if any
- -            size_t step;
- -            //! pointer to the data(OCL memory object)
- -            uchar *data;
- -
- -            //! pointer to the reference counter;
- -            // when oclMatrix points to user-allocated data, the pointer is NULL
- -            int *refcount;
- -
- -            //! helper fields used in locateROI and adjustROI
- -            //datastart and dataend are not used in current version
- -            uchar *datastart;
- -            uchar *dataend;
- -
- -            //! OpenCL context associated with the oclMat object.
- -            Context *clCxt;
- -            //add offset for handle ROI, calculated in byte
- -            int offset;
- -            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
- -            int wholerows;
- -            int wholecols;
- -        };
- -
- -
- -        ///////////////////// mat split and merge /////////////////////////////////
- -        //! Compose a multi-channel array from several single-channel arrays
- -        // Support all types
- -        CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
- -        CV_EXPORTS void merge(const vector<oclMat> &src, oclMat &dst);
- -
- -        //! Divides multi-channel array into several single-channel arrays
- -        // Support all types
- -        CV_EXPORTS void split(const oclMat &src, oclMat *dst);
- -        CV_EXPORTS void split(const oclMat &src, vector<oclMat> &dst);
- -
- -        ////////////////////////////// Arithmetics ///////////////////////////////////
- -        //#if defined DOUBLE_SUPPORT
- -        //typedef double F;
- -        //#else
- -        //typedef float F;
- -        //#endif
- -        //    CV_EXPORTS void addWeighted(const oclMat& a,F  alpha, const oclMat& b,F beta,F gama, oclMat& c);
- -        CV_EXPORTS void addWeighted(const oclMat &a, double  alpha, const oclMat &b, double beta, double gama, oclMat &c);
- -        //! adds one matrix to another (c = a + b)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
- -        //! adds one matrix to another (c = a + b)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
- -        //! adds scalar to a matrix (c = a + s)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
- -        //! subtracts one matrix from another (c = a - b)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
- -        //! subtracts one matrix from another (c = a - b)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
- -        //! subtracts scalar from a matrix (c = a - s)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
- -        //! subtracts scalar from a matrix (c = a - s)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
- -        //! computes element-wise product of the two arrays (c = a * b)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
- -        //! multiplies matrix to a number (dst = scalar * src)
- -        // supports CV_32FC1 only
- -        CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
- -        //! computes element-wise quotient of the two arrays (c = a / b)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
- -        //! computes element-wise quotient of the two arrays (c = a / b)
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
- -
- -        //! compares elements of two arrays (c = a <cmpop> b)
- -        // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
- -        CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
- -
- -        //! transposes the matrix
- -        // supports  CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
- -        CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
- -
- -        //! computes element-wise absolute difference of two arrays (c = abs(a - b))
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
- -        //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
- -        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
- -        CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
- -
- -        //! computes mean value and standard deviation of all or selected array elements
- -        // supports except CV_32F,CV_64F
- -        CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
- -
- -        //! computes norm of array
- -        // supports NORM_INF, NORM_L1, NORM_L2
- -        // supports only CV_8UC1 type
- -        CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
- -
- -        //! computes norm of the difference between two arrays
- -        // supports NORM_INF, NORM_L1, NORM_L2
- -        // supports only CV_8UC1 type
- -        CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
- -
- -        //! reverses the order of the rows, columns or both in a matrix
- -        // supports all types
- -        CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
- -
- -        //! computes sum of array elements
- -        // disabled until fix crash
- -        // support all types
- -        CV_EXPORTS Scalar sum(const oclMat &m);
- -        CV_EXPORTS Scalar absSum(const oclMat &m);
- -        CV_EXPORTS Scalar sqrSum(const oclMat &m);
- -
- -        //! finds global minimum and maximum array elements and returns their values
- -        // support all C1 types
- -
- -        CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
- -
- -        //! finds global minimum and maximum array elements and returns their values with locations
- -        // support all C1 types
- -
- -        CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
- -                                  const oclMat &mask = oclMat());
- -
- -        //! counts non-zero array elements
- -        // support all types
- -        CV_EXPORTS int countNonZero(const oclMat &src);
- -
- -        //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
- -        // destination array will have the depth type as lut and the same channels number as source
- -        //It supports 8UC1 8UC4 only
- -        CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
- -
- -        //! only 8UC1 and 256 bins is supported now
- -        CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
- -        //! only 8UC1 and 256 bins is supported now
- -        CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
- -        //! bilateralFilter
- -        // supports 8UC1 8UC4
- -        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
- -        //! computes exponent of each matrix element (b = e**a)
- -        // supports only CV_32FC1 type
- -        CV_EXPORTS void exp(const oclMat &a, oclMat &b);
- -
- -        //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
- -        // supports only CV_32FC1 type
- -        CV_EXPORTS void log(const oclMat &a, oclMat &b);
- -
- -        //! computes magnitude of each (x(i), y(i)) vector
- -        // supports only CV_32F CV_64F type
- -        CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
- -        CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
- -
- -        CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
- -
- -        //! computes angle (angle(i)) of each (x(i), y(i)) vector
- -        // supports only CV_32F CV_64F type
- -        CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
- -
- -        //! the function raises every element of tne input array to p
- -        //! support only CV_32F CV_64F type
- -        CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
- -
- -        //! converts Cartesian coordinates to polar
- -        // supports only CV_32F CV_64F type
- -        CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
- -
- -        //! converts polar coordinates to Cartesian
- -        // supports only CV_32F CV_64F type
- -        CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
- -
- -        //! perfroms per-elements bit-wise inversion
- -        // supports all types
- -        CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
- -        //! calculates per-element bit-wise disjunction of two arrays
- -        // supports all types
- -        CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
- -        CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
- -        //! calculates per-element bit-wise conjunction of two arrays
- -        // supports all types
- -        CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
- -        CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
- -        //! calculates per-element bit-wise "exclusive or" operation
- -        // supports all types
- -        CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
- -        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
- -
- -        //! Logical operators
- -        CV_EXPORTS oclMat operator ~ (const oclMat &);
- -        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
- -        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
- -        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
- -
- -
- -        //! Mathematics operators
- -        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
- -        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
- -        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
- -        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
- -
- -        //! computes convolution of two images
- -        //! support only CV_32FC1 type
- -        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
- -
- -        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
- -
- -        //////////////////////////////// Filter Engine ////////////////////////////////
- -
- -        /*!
- -          The Base Class for 1D or Row-wise Filters
- -
- -          This is the base class for linear or non-linear filters that process 1D data.
- -          In particular, such filters are used for the "horizontal" filtering parts in separable filters.
- -          */
- -        class CV_EXPORTS BaseRowFilter_GPU
- -        {
- -        public:
- -            BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
- -            virtual ~BaseRowFilter_GPU() {}
- -            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- -            int ksize, anchor, bordertype;
- -        };
- -
- -        /*!
- -          The Base Class for Column-wise Filters
- -
- -          This is the base class for linear or non-linear filters that process columns of 2D arrays.
- -          Such filters are used for the "vertical" filtering parts in separable filters.
- -          */
- -        class CV_EXPORTS BaseColumnFilter_GPU
- -        {
- -        public:
- -            BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
- -            virtual ~BaseColumnFilter_GPU() {}
- -            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- -            int ksize, anchor, bordertype;
- -        };
- -
- -        /*!
- -          The Base Class for Non-Separable 2D Filters.
- -
- -          This is the base class for linear or non-linear 2D filters.
- -          */
- -        class CV_EXPORTS BaseFilter_GPU
- -        {
- -        public:
- -            BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
- -                : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
- -            virtual ~BaseFilter_GPU() {}
- -            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- -            Size ksize;
- -            Point anchor;
- -            int borderType;
- -        };
- -
- -        /*!
- -          The Base Class for Filter Engine.
- -
- -          The class can be used to apply an arbitrary filtering operation to an image.
- -          It contains all the necessary intermediate buffers.
- -          */
- -        class CV_EXPORTS FilterEngine_GPU
- -        {
- -        public:
- -            virtual ~FilterEngine_GPU() {}
- -
- -            virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
- -        };
- -
- -        //! returns the non-separable filter engine with the specified filter
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
- -
- -        //! returns the primitive row filter with the specified kernel
- -        CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
- -                int anchor = -1, int bordertype = BORDER_DEFAULT);
- -
- -        //! returns the primitive column filter with the specified kernel
- -        CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
- -                int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
- -
- -        //! returns the separable linear filter engine
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
- -                const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
- -
- -        //! returns the separable filter engine with the specified filters
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
- -                const Ptr<BaseColumnFilter_GPU> &columnFilter);
- -
- -        //! returns the Gaussian filter engine
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
- -
- -        //! returns filter engine for the generalized Sobel operator
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
- -
- -        //! applies Laplacian operator to the image
- -        // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
- -        CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
- -
- -        //! returns 2D box filter
- -        // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
- -        CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
- -                const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
- -
- -        //! returns box filter engine
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
- -                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
- -
- -        //! returns 2D filter with the specified kernel
- -        // supports CV_8UC1 and CV_8UC4 types
- -        CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
- -                Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
- -
- -        //! returns the non-separable linear filter engine
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
- -                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
- -
- -        //! smooths the image using the normalized box filter
- -        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- -        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
- -        CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
- -                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
- -
- -        //! returns 2D morphological filter
- -        //! only MORPH_ERODE and MORPH_DILATE are supported
- -        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- -        // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
- -        CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
- -                Point anchor = Point(-1, -1));
- -
- -        //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
- -        CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
- -                const Point &anchor = Point(-1, -1), int iterations = 1);
- -
- -        //! a synonym for normalized box filter
- -        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- -        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- -        static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
- -                                int borderType = BORDER_CONSTANT)
- -        {
- -            boxFilter(src, dst, -1, ksize, anchor, borderType);
- -        }
- -
- -        //! applies non-separable 2D linear filter to the image
- -        CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
- -                                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
- -
- -        //! applies separable 2D linear filter to the image
- -        CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
- -                                    Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
- -
- -        //! applies generalized Sobel operator to the image
- -        // dst.type must equalize src.type
- -        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- -        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- -        CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
- -
- -        //! applies the vertical or horizontal Scharr operator to the image
- -        // dst.type must equalize src.type
- -        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- -        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- -        CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
- -
- -        //! smooths the image using Gaussian filter.
- -        // dst.type must equalize src.type
- -        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- -        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
- -        CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
- -
- -        //! erodes the image (applies the local minimum operator)
- -        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- -        CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
- -
- -                               int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
- -
- -
- -        //! dilates the image (applies the local maximum operator)
- -        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- -        CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
- -
- -                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
- -
- -
- -        //! applies an advanced morphological operation to the image
- -        CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
- -
- -                                      int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
- -
- -
- -        ////////////////////////////// Image processing //////////////////////////////
- -        //! Does mean shift filtering on GPU.
- -        CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
- -                                           TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
- -
- -        //! Does mean shift procedure on GPU.
- -        CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
- -                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
- -
- -        //! Does mean shift segmentation with elimiation of small regions.
- -        CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
- -                                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
- -
- -        //! applies fixed threshold to the image.
- -        // supports CV_8UC1 and CV_32FC1 data type
- -        // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
- -        CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
- -
- -        //! resizes the image
- -        // Supports INTER_NEAREST, INTER_LINEAR
- -        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- -        CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
- -
- -        //! Applies a generic geometrical transformation to an image.
- -
- -        // Supports INTER_NEAREST, INTER_LINEAR.
- -
- -        // Map1 supports CV_16SC2, CV_32FC2  types.
- -
- -        // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
- -
- -        CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
- -
- -        //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
- -        // supports CV_8UC1, CV_8UC4, CV_32SC1 types
- -        CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
- -
- -        //! Smoothes image using median filter
- -        // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
- -        CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
- -
- -        //! warps the image using affine transformation
- -        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
- -        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- -        CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
- -
- -        //! warps the image using perspective transformation
- -        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
- -        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
- -        CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
- -
- -        //! computes the integral image and integral for the squared image
- -        // sum will have CV_32S type, sqsum - CV32F type
- -        // supports only CV_8UC1 source type
- -        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
- -        CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
- -        CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
- -        CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
- -
- -        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- -        ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
- -        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- -
- -        class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
- -        {
- -        public:
- -            OclCascadeClassifier() {};
- -            ~OclCascadeClassifier() {};
- -
- -            CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
- -                                        int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
- -        };
- -
- -        class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
- -        {
- -        public:
- -            OclCascadeClassifierBuf() :
- -                m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
- -
- -            ~OclCascadeClassifierBuf() {}
- -
- -            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
- -                                  double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
- -                                  Size minSize = Size(), Size maxSize = Size());
- -            void release();
- -
- -        private:
- -            void Init(const int rows, const int cols, double scaleFactor, int flags,
- -                      const int outputsz, const size_t localThreads[],
- -                      CvSize minSize, CvSize maxSize);
- -            void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
- -            void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
- -                                         const double scaleFactor, const size_t localThreads[],
- -                                         CvSize minSize, CvSize maxSize);
- -            void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
- -
- -            int m_rows;
- -            int m_cols;
- -            int m_flags;
- -            int m_loopcount;
- -            int m_nodenum;
- -            bool findBiggestObject;
- -            bool initialized;
- -            double m_scaleFactor;
- -            Size m_minSize;
- -            Size m_maxSize;
- -            vector<CvSize> sizev;
- -            vector<float> scalev;
- -            oclMat gimg1, gsum, gsqsum;
- -            void * buffers;
- -        };
- -
- -
- -        /////////////////////////////// Pyramid /////////////////////////////////////
- -        CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
- -
- -        //! upsamples the source image and then smoothes it
- -        CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
- -
- -        //! performs linear blending of two images
- -        //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
- -        // supports only CV_8UC1 source type
- -        CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
- -
- -        //! computes vertical sum, supports only CV_32FC1 images
- -        CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
- -
- -        ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
- -        struct CV_EXPORTS MatchTemplateBuf
- -        {
- -            Size user_block_size;
- -            oclMat imagef, templf;
- -            std::vector<oclMat> images;
- -            std::vector<oclMat> image_sums;
- -            std::vector<oclMat> image_sqsums;
- -        };
- -
- -        //! computes the proximity map for the raster template and the image where the template is searched for
- -        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
- -        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
- -        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
- -
- -        //! computes the proximity map for the raster template and the image where the template is searched for
- -        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
- -        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
- -        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
- -
- -        ///////////////////////////////////////////// Canny /////////////////////////////////////////////
- -        struct CV_EXPORTS CannyBuf;
- -        //! compute edges of the input image using Canny operator
- -        // Support CV_8UC1 only
- -        CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
- -        CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
- -        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
- -        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
- -
- -        struct CV_EXPORTS CannyBuf
- -        {
- -            CannyBuf() : counter(NULL) {}
- -            ~CannyBuf()
- -            {
- -                release();
- -            }
- -            explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
- -            {
- -                create(image_size, apperture_size);
- -            }
- -            CannyBuf(const oclMat &dx_, const oclMat &dy_);
- -
- -            void create(const Size &image_size, int apperture_size = 3);
- -            void release();
- -            oclMat dx, dy;
- -            oclMat dx_buf, dy_buf;
- -            oclMat edgeBuf;
- -            oclMat trackBuf1, trackBuf2;
- -            void *counter;
- -            Ptr<FilterEngine_GPU> filterDX, filterDY;
- -        };
- -
- -        ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
- -        //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
- -        //! Param dft_size is the size of DFT transform.
- -        //!
- -        //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
- -        // support src type of CV32FC1, CV32FC2
- -        // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
- -        // dft_size is the size of original input, which is used for transformation from complex to real.
- -        // dft_size must be powers of 2, 3 and 5
- -        // real to complex dft requires at least v1.8 clAmdFft
- -        // real to complex dft output is not the same with cpu version
- -        // real to complex and complex to real does not support DFT_ROWS
- -        CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
- -
- -        //! implements generalized matrix product algorithm GEMM from BLAS
- -        // The functionality requires clAmdBlas library
- -        // only support type CV_32FC1
- -        // flag GEMM_3_T is not supported
- -        CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
- -                             const oclMat &src3, double beta, oclMat &dst, int flags = 0);
- -
- -        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
- -        struct CV_EXPORTS HOGDescriptor
- -        {
- -            enum { DEFAULT_WIN_SIGMA = -1 };
- -            enum { DEFAULT_NLEVELS = 64 };
- -            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
- -            HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
- -                          Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
- -                          int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
- -                          double threshold_L2hys = 0.2, bool gamma_correction = true,
- -                          int nlevels = DEFAULT_NLEVELS);
- -
- -            size_t getDescriptorSize() const;
- -            size_t getBlockHistogramSize() const;
- -            void setSVMDetector(const vector<float> &detector);
- -            static vector<float> getDefaultPeopleDetector();
- -            static vector<float> getPeopleDetector48x96();
- -            static vector<float> getPeopleDetector64x128();
- -            void detect(const oclMat &img, vector<Point> &found_locations,
- -                        double hit_threshold = 0, Size win_stride = Size(),
- -                        Size padding = Size());
- -            void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
- -                                  double hit_threshold = 0, Size win_stride = Size(),
- -                                  Size padding = Size(), double scale0 = 1.05,
- -                                  int group_threshold = 2);
- -            void getDescriptors(const oclMat &img, Size win_stride,
- -                                oclMat &descriptors,
- -                                int descr_format = DESCR_FORMAT_COL_BY_COL);
- -            Size win_size;
- -            Size block_size;
- -            Size block_stride;
- -            Size cell_size;
- -
- -            int nbins;
- -            double win_sigma;
- -            double threshold_L2hys;
- -            bool gamma_correction;
- -            int nlevels;
- -
- -        protected:
- -            // initialize buffers; only need to do once in case of multiscale detection
- -            void init_buffer(const oclMat &img, Size win_stride);
- -            void computeBlockHistograms(const oclMat &img);
- -            void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
- -            double getWinSigma() const;
- -            bool checkDetectorSize() const;
- -
- -            static int numPartsWithin(int size, int part_size, int stride);
- -            static Size numPartsWithin(Size size, Size part_size, Size stride);
- -
- -            // Coefficients of the separating plane
- -            float free_coef;
- -            oclMat detector;
- -            // Results of the last classification step
- -            oclMat labels;
- -            Mat labels_host;
- -            // Results of the last histogram evaluation step
- -            oclMat block_hists;
- -            // Gradients conputation results
- -            oclMat grad, qangle;
- -            // scaled image
- -            oclMat image_scale;
- -            // effect size of input image (might be different from original size after scaling)
- -            Size effect_size;
- -        };
- -
- -
- -        ////////////////////////feature2d_ocl/////////////////
- -        /****************************************************************************************\
- -        *                                      Distance                                          *
- -        \****************************************************************************************/
- -        template<typename T>
- -        struct CV_EXPORTS Accumulator
- -        {
- -            typedef T Type;
- -        };
- -        template<> struct Accumulator<unsigned char>
- -        {
- -            typedef float Type;
- -        };
- -        template<> struct Accumulator<unsigned short>
- -        {
- -            typedef float Type;
- -        };
- -        template<> struct Accumulator<char>
- -        {
- -            typedef float Type;
- -        };
- -        template<> struct Accumulator<short>
- -        {
- -            typedef float Type;
- -        };
- -
- -        /*
- -         * Manhattan distance (city block distance) functor
- -         */
- -        template<class T>
- -        struct CV_EXPORTS L1
- -        {
- -            enum { normType = NORM_L1 };
- -            typedef T ValueType;
- -            typedef typename Accumulator<T>::Type ResultType;
- -
- -            ResultType operator()( const T *a, const T *b, int size ) const
- -            {
- -                return normL1<ValueType, ResultType>(a, b, size);
- -            }
- -        };
- -
- -        /*
- -         * Euclidean distance functor
- -         */
- -        template<class T>
- -        struct CV_EXPORTS L2
- -        {
- -            enum { normType = NORM_L2 };
- -            typedef T ValueType;
- -            typedef typename Accumulator<T>::Type ResultType;
- -
- -            ResultType operator()( const T *a, const T *b, int size ) const
- -            {
- -                return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
- -            }
- -        };
- -
- -        /*
- -         * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
- -         * bit count of A exclusive XOR'ed with B
- -         */
- -        struct CV_EXPORTS Hamming
- -        {
- -            enum { normType = NORM_HAMMING };
- -            typedef unsigned char ValueType;
- -            typedef int ResultType;
- -
- -            /** this will count the bits in a ^ b
- -             */
- -            ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
- -            {
- -                return normHamming(a, b, size);
- -            }
- -        };
- -
- -        ////////////////////////////////// BruteForceMatcher //////////////////////////////////
- -
- -        class CV_EXPORTS BruteForceMatcher_OCL_base
- -        {
- -        public:
- -            enum DistType {L1Dist = 0, L2Dist, HammingDist};
- -            explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
- -            // Add descriptors to train descriptor collection
- -            void add(const std::vector<oclMat> &descCollection);
- -            // Get train descriptors collection
- -            const std::vector<oclMat> &getTrainDescriptors() const;
- -            // Clear train descriptors collection
- -            void clear();
- -            // Return true if there are not train descriptors in collection
- -            bool empty() const;
- -
- -            // Return true if the matcher supports mask in match methods
- -            bool isMaskSupported() const;
- -
- -            // Find one best match for each query descriptor
- -            void matchSingle(const oclMat &query, const oclMat &train,
- -                             oclMat &trainIdx, oclMat &distance,
- -                             const oclMat &mask = oclMat());
- -
- -            // Download trainIdx and distance and convert it to CPU vector with DMatch
- -            static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
- -            // Convert trainIdx and distance to vector with DMatch
- -            static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
- -
- -            // Find one best match for each query descriptor
- -            void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
- -
- -            // Make gpu collection of trains and masks in suitable format for matchCollection function
- -            void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
- -
- -
- -            // Find one best match from train collection for each query descriptor
- -            void matchCollection(const oclMat &query, const oclMat &trainCollection,
- -                                 oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
- -                                 const oclMat &masks = oclMat());
- -
- -            // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
- -            static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
- -            // Convert trainIdx, imgIdx and distance to vector with DMatch
- -            static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
- -
- -            // Find one best match from train collection for each query descriptor.
- -            void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
- -
- -            // Find k best matches for each query descriptor (in increasing order of distances)
- -            void knnMatchSingle(const oclMat &query, const oclMat &train,
- -                                oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
- -                                const oclMat &mask = oclMat());
- -
- -            // Download trainIdx and distance and convert it to vector with DMatch
- -            // compactResult is used when mask is not empty. If compactResult is false matches
- -            // vector will have the same size as queryDescriptors rows. If compactResult is true
- -            // matches vector will not contain matches for fully masked out query descriptors.
- -            static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
- -                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -
- -            // Convert trainIdx and distance to vector with DMatch
- -            static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
- -                                        std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -
- -            // Find k best matches for each query descriptor (in increasing order of distances).
- -            // compactResult is used when mask is not empty. If compactResult is false matches
- -            // vector will have the same size as queryDescriptors rows. If compactResult is true
- -            // matches vector will not contain matches for fully masked out query descriptors.
- -            void knnMatch(const oclMat &query, const oclMat &train,
- -                          std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
- -                          bool compactResult = false);
- -
- -            // Find k best matches from train collection for each query descriptor (in increasing order of distances)
- -            void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
- -                                     oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
- -                                     const oclMat &maskCollection = oclMat());
- -
- -            // Download trainIdx and distance and convert it to vector with DMatch
- -            // compactResult is used when mask is not empty. If compactResult is false matches
- -            // vector will have the same size as queryDescriptors rows. If compactResult is true
- -            // matches vector will not contain matches for fully masked out query descriptors.
- -            static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
- -                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -
- -            // Convert trainIdx and distance to vector with DMatch
- -            static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
- -                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -
- -            // Find k best matches  for each query descriptor (in increasing order of distances).
- -            // compactResult is used when mask is not empty. If compactResult is false matches
- -            // vector will have the same size as queryDescriptors rows. If compactResult is true
- -            // matches vector will not contain matches for fully masked out query descriptors.
- -            void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
- -                          const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
- -
- -            // Find best matches for each query descriptor which have distance less than maxDistance.
- -            // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
- -            // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
- -            // because it didn't have enough memory.
- -            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
- -            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
- -            // Matches doesn't sorted.
- -            void radiusMatchSingle(const oclMat &query, const oclMat &train,
- -                                   oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
- -                                   const oclMat &mask = oclMat());
- -
- -            // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
- -            // matches will be sorted in increasing order of distances.
- -            // compactResult is used when mask is not empty. If compactResult is false matches
- -            // vector will have the same size as queryDescriptors rows. If compactResult is true
- -            // matches vector will not contain matches for fully masked out query descriptors.
- -            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
- -                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -            // Convert trainIdx, nMatches and distance to vector with DMatch.
- -            static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
- -                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -            // Find best matches for each query descriptor which have distance less than maxDistance
- -            // in increasing order of distances).
- -            void radiusMatch(const oclMat &query, const oclMat &train,
- -                             std::vector< std::vector<DMatch> > &matches, float maxDistance,
- -                             const oclMat &mask = oclMat(), bool compactResult = false);
- -            // Find best matches for each query descriptor which have distance less than maxDistance.
- -            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
- -            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
- -            // Matches doesn't sorted.
- -            void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
- -                                       const std::vector<oclMat> &masks = std::vector<oclMat>());
- -            // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
- -            // matches will be sorted in increasing order of distances.
- -            // compactResult is used when mask is not empty. If compactResult is false matches
- -            // vector will have the same size as queryDescriptors rows. If compactResult is true
- -            // matches vector will not contain matches for fully masked out query descriptors.
- -            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
- -                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -            // Convert trainIdx, nMatches and distance to vector with DMatch.
- -            static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
- -                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
- -            // Find best matches from train collection for each query descriptor which have distance less than
- -            // maxDistance (in increasing order of distances).
- -            void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
- -                             const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
- -            DistType distType;
- -        private:
- -            std::vector<oclMat> trainDescCollection;
- -        };
- -
- -        template <class Distance>
- -        class CV_EXPORTS BruteForceMatcher_OCL;
- -
- -        template <typename T>
- -        class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
- -        {
- -        public:
- -            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
- -            explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
- -        };
- -
- -        template <typename T>
- -        class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
- -        {
- -        public:
- -            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
- -            explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
- -        };
- -
- -        template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
- -        {
- -        public:
- -            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
- -            explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
- -        };
- -
- -        class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
- -        {
- -        public:
- -            explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
- -        };
- -        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
- -        class CV_EXPORTS PyrLKOpticalFlow
- -        {
- -        public:
- -            PyrLKOpticalFlow()
- -            {
- -                winSize = Size(21, 21);
- -                maxLevel = 3;
- -                iters = 30;
- -                derivLambda = 0.5;
- -                useInitialFlow = false;
- -                minEigThreshold = 1e-4f;
- -                getMinEigenVals = false;
- -                isDeviceArch11_ = false;
- -            }
- -
- -            void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
- -                        oclMat &status, oclMat *err = 0);
- -            void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
- -            Size winSize;
- -            int maxLevel;
- -            int iters;
- -            double derivLambda;
- -            bool useInitialFlow;
- -            float minEigThreshold;
- -            bool getMinEigenVals;
- -            void releaseMemory()
- -            {
- -                dx_calcBuf_.release();
- -                dy_calcBuf_.release();
- -
- -                prevPyr_.clear();
- -                nextPyr_.clear();
- -
- -                dx_buf_.release();
- -                dy_buf_.release();
- -            }
- -        private:
- -            void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
- -            void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
- -
- -            oclMat dx_calcBuf_;
- -            oclMat dy_calcBuf_;
- -
- -            vector<oclMat> prevPyr_;
- -            vector<oclMat> nextPyr_;
- -
- -            oclMat dx_buf_;
- -            oclMat dy_buf_;
- -            oclMat uPyr_[2];
- -            oclMat vPyr_[2];
- -            bool isDeviceArch11_;
- -        };
- -        //////////////// build warping maps ////////////////////
- -        //! builds plane warping maps
- -        CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
- -        //! builds cylindrical warping maps
- -        CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
- -        //! builds spherical warping maps
- -        CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
- -        //! builds Affine warping maps
- -        CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
- -
- -        //! builds Perspective warping maps
- -        CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
- -
- -        ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
- -        //! Interpolate frames (images) using provided optical flow (displacement field).
- -        //! frame0   - frame 0 (32-bit floating point images, single channel)
- -        //! frame1   - frame 1 (the same type and size)
- -        //! fu       - forward horizontal displacement
- -        //! fv       - forward vertical displacement
- -        //! bu       - backward horizontal displacement
- -        //! bv       - backward vertical displacement
- -        //! pos      - new frame position
- -        //! newFrame - new frame
- -        //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
- -        //!            occlusion masks            0, occlusion masks            1,
- -        //!            interpolated forward flow  0, interpolated forward flow  1,
- -        //!            interpolated backward flow 0, interpolated backward flow 1
- -        //!
- -        CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
- -                                          const oclMat &fu, const oclMat &fv,
- -                                          const oclMat &bu, const oclMat &bv,
- -                                          float pos, oclMat &newFrame, oclMat &buf);
- -
- -        //! computes moments of the rasterized shape or a vector of points
- -        CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
- -
- -        class CV_EXPORTS StereoBM_OCL
- -        {
- -        public:
- -            enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
- -
- -            enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
- -
- -            //! the default constructor
- -            StereoBM_OCL();
- -            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
- -            StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
- -
- -            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
- -            //! Output disparity has CV_8U type.
- -            void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
- -
- -            //! Some heuristics that tries to estmate
- -            // if current GPU will be faster then CPU in this algorithm.
- -            // It queries current active device.
- -            static bool checkIfGpuCallReasonable();
- -
- -            int preset;
- -            int ndisp;
- -            int winSize;
- -
- -            // If avergeTexThreshold  == 0 => post procesing is disabled
- -            // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
- -            // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
- -            // i.e. input left image is low textured.
- -            float avergeTexThreshold;
- -        private:
- -            oclMat minSSD, leBuf, riBuf;
- -        };
- -
- -        class CV_EXPORTS StereoBeliefPropagation
- -        {
- -        public:
- -            enum { DEFAULT_NDISP  = 64 };
- -            enum { DEFAULT_ITERS  = 5  };
- -            enum { DEFAULT_LEVELS = 5  };
- -            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
- -            explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
- -                                             int iters  = DEFAULT_ITERS,
- -                                             int levels = DEFAULT_LEVELS,
- -                                             int msg_type = CV_16S);
- -            StereoBeliefPropagation(int ndisp, int iters, int levels,
- -                                    float max_data_term, float data_weight,
- -                                    float max_disc_term, float disc_single_jump,
- -                                    int msg_type = CV_32F);
- -            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
- -            void operator()(const oclMat &data, oclMat &disparity);
- -            int ndisp;
- -            int iters;
- -            int levels;
- -            float max_data_term;
- -            float data_weight;
- -            float max_disc_term;
- -            float disc_single_jump;
- -            int msg_type;
- -        private:
- -            oclMat u, d, l, r, u2, d2, l2, r2;
- -            std::vector<oclMat> datas;
- -            oclMat out;
- -        };
- -
- -        class CV_EXPORTS StereoConstantSpaceBP
- -        {
- -        public:
- -            enum { DEFAULT_NDISP    = 128 };
- -            enum { DEFAULT_ITERS    = 8   };
- -            enum { DEFAULT_LEVELS   = 4   };
- -            enum { DEFAULT_NR_PLANE = 4   };
- -            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
- -            explicit StereoConstantSpaceBP(
- -                int ndisp    = DEFAULT_NDISP,
- -                int iters    = DEFAULT_ITERS,
- -                int levels   = DEFAULT_LEVELS,
- -                int nr_plane = DEFAULT_NR_PLANE,
- -                int msg_type = CV_32F);
- -            StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
- -                float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
- -                int min_disp_th = 0,
- -                int msg_type = CV_32F);
- -            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
- -            int ndisp;
- -            int iters;
- -            int levels;
- -            int nr_plane;
- -            float max_data_term;
- -            float data_weight;
- -            float max_disc_term;
- -            float disc_single_jump;
- -            int min_disp_th;
- -            int msg_type;
- -            bool use_local_init_data_cost;
- -        private:
- -            oclMat u[2], d[2], l[2], r[2];
- -            oclMat disp_selected_pyr[2];
- -            oclMat data_cost;
- -            oclMat data_cost_selected;
- -            oclMat temp;
- -            oclMat out;
- -        };
- -
- -        // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
- -        //
- -        // see reference:
- -        //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
- -        //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
- -        class CV_EXPORTS OpticalFlowDual_TVL1_OCL
- -        {
- -        public:
- -            OpticalFlowDual_TVL1_OCL();
- -
- -            void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
- -
- -            void collectGarbage();
- -
- -            /**
- -            * Time step of the numerical scheme.
- -            */
- -            double tau;
- -
- -            /**
- -            * Weight parameter for the data term, attachment parameter.
- -            * This is the most relevant parameter, which determines the smoothness of the output.
- -            * The smaller this parameter is, the smoother the solutions we obtain.
- -            * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
- -            */
- -            double lambda;
- -
- -            /**
- -            * Weight parameter for (u - v)^2, tightness parameter.
- -            * It serves as a link between the attachment and the regularization terms.
- -            * In theory, it should have a small value in order to maintain both parts in correspondence.
- -            * The method is stable for a large range of values of this parameter.
- -            */
- -            double theta;
- -
- -            /**
- -            * Number of scales used to create the pyramid of images.
- -            */
- -            int nscales;
- -
- -            /**
- -            * Number of warpings per scale.
- -            * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
- -            * This is a parameter that assures the stability of the method.
- -            * It also affects the running time, so it is a compromise between speed and accuracy.
- -            */
- -            int warps;
- -
- -            /**
- -            * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
- -            * A small value will yield more accurate solutions at the expense of a slower convergence.
- -            */
- -            double epsilon;
- -
- -            /**
- -            * Stopping criterion iterations number used in the numerical scheme.
- -            */
- -            int iterations;
- -
- -            bool useInitialFlow;
- -
- -        private:
- -            void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
- -
- -            std::vector<oclMat> I0s;
- -            std::vector<oclMat> I1s;
- -            std::vector<oclMat> u1s;
- -            std::vector<oclMat> u2s;
- -
- -            oclMat I1x_buf;
- -            oclMat I1y_buf;
- -
- -            oclMat I1w_buf;
- -            oclMat I1wx_buf;
- -            oclMat I1wy_buf;
- -
- -            oclMat grad_buf;
- -            oclMat rho_c_buf;
- -
- -            oclMat p11_buf;
- -            oclMat p12_buf;
- -            oclMat p21_buf;
- -            oclMat p22_buf;
- -
- -            oclMat diff_buf;
- -            oclMat norm_buf;
- -        };
- -    }
- -}
- -#if defined _MSC_VER && _MSC_VER >= 1200
- -#  pragma warning( push)
- -#  pragma warning( disable: 4267)
- -#endif
- -#include "opencv2/ocl/matrix_operations.hpp"
- -#if defined _MSC_VER && _MSC_VER >= 1200
- -#  pragma warning( pop)
+ +#ifdef __OPENCV_BUILD
+ +#error this is a compatibility header which should not be used inside the OpenCV library
   #endif
   
- #include "opencv2/ocl.hpp"
- -#endif /* __OPENCV_GPU_HPP__ */
++#include "opencv2/ocl.hpp"
diff --cc modules/ocl/src/arithm.cpp

index f7024ff,ed2515d..d425344
--- 1/modules/ocl/src/arithm.cpp
--- 2/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@@ -126,8 -118,8 +117,8 @@@ inline int divUp(int total, int grain
   /////////////////////// add subtract multiply divide /////////////////////////
   //////////////////////////////////////////////////////////////////////////////
   template<typename T>
--void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, 
- -                    string kernelName, const char **kernelString, void *_scalar, int op_type = 0)
++void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ +                    String kernelName, const char **kernelString, void *_scalar, int op_type = 0)
   {
       if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
@@@ -195,13 -187,13 +186,13 @@@
               openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
       }
   }
--static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, 
- -                           string kernelName, const char **kernelString, int op_type = 0)
++static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
+ +                           String kernelName, const char **kernelString, int op_type = 0)
   {
       arithmetic_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL, op_type);
   }
--static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, 
- -                           string kernelName, const char **kernelString, int op_type = 0)
++static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask,
+ +                           String kernelName, const char **kernelString, int op_type = 0)
   {
       if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
@@@ -474,11 -466,15 +466,16 @@@ void cv::ocl::subtract(const oclMat &sr
   }
   void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, const oclMat &mask)
   {
- -    string kernelName = mask.data ? "arithm_s_add_with_mask" : "arithm_s_add";
+ +    String kernelName = mask.data ? "arithm_s_add_with_mask" : "arithm_s_add";
       const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar;
+ +
       arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, -1);
   }
- -    string kernelName = "arithm_muls";
+ void cv::ocl::multiply(double scalar, const oclMat &src, oclMat &dst)
+ {
++    String kernelName = "arithm_muls";
+     arithmetic_scalar_run( src, dst, kernelName, &arithm_mul, scalar);
+ }
   void cv::ocl::divide(double scalar, const oclMat &src,  oclMat &dst)
   {
       if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
@@@ -1647,7 -1643,8 +1644,8 @@@ static void bitwise_run(const oclMat &s
   
   
   template<typename T>
- void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName, const char **kernelString, void *_scalar)
- -void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName,
++void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName,
+  const char **kernelString, void *_scalar, const char* _opt = NULL)
   {
       dst.create(src1.size(), src1.type());
       CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
@@@ -1694,16 -1691,18 +1692,18 @@@
       {
           double scalar1 = *((double *)_scalar);
           T scalar = (T)scalar1;
- -        args.push_back( make_pair( sizeof(T), (void *)&scalar ));
+ +        args.push_back( std::make_pair( sizeof(T), (void *)&scalar ));
       }
   
-     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth, _opt);
   }
- static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String kernelName, const char **kernelString)
+ static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
- - string kernelName, const char **kernelString, const char* _opt = NULL)
++ String kernelName, const char **kernelString, const char* _opt = NULL)
   {
-     bitwise_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL);
+     bitwise_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL, _opt);
   }
- static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString)
+ static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
- - const oclMat &mask, string kernelName, const char **kernelString, const char* _opt = NULL)
++ const oclMat &mask, String kernelName, const char **kernelString, const char* _opt = NULL)
   {
       dst.create(src1.size(), src1.type());
       CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
@@@ -1734,29 -1733,30 +1734,30 @@@
                                 };
   
       int dst_step1 = dst.cols * dst.elemSize();
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&mask.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&mask.offset ));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
   
-     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, _opt);
   }
   
   
   template <typename WT , typename CL_WT>
- void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar)
+ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst,
- - const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt = NULL)
++ const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar, const char* opt = NULL)
   {
       dst.create(src1.size(), src1.type());
   
@@@ -1815,17 -1815,19 +1816,19 @@@
       if(isMatSubScalar != 0)
       {
           isMatSubScalar = isMatSubScalar > 0 ? 1 : 0;
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&isMatSubScalar));
+ +        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&isMatSubScalar));
       }
   
-     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
+     openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth, opt);
   }
   
   
- typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar);
+ typedef void (*BitwiseFuncS)(const oclMat &src1, const Scalar &src2, oclMat &dst,
- - const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt);
++ const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar, const char* opt);
   
   
- static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar)
+ static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
- - const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar, const char* opt)
++ const oclMat &mask, String kernelName, const char **kernelString, int isMatSubScalar, const char* opt)
   {
       static BitwiseFuncS tab[8] =
       {
@@@ -1852,12 -1854,13 +1855,13 @@@
       };
       BitwiseFuncS func = tab[src1.depth()];
       if(func == 0)
- -        cv::ocl::error("Unsupported arithmetic operation", __FILE__, __LINE__);
+ +        cv::error(Error::StsBadArg, "Unsupported arithmetic operation", "", __FILE__, __LINE__);
-     func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar);
+     func(src1, src2, dst, mask, kernelName, kernelString, isMatSubScalar, opt);
   }
- static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, String kernelName, const char **kernelString)
+ static void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst,
- - const oclMat &mask, string kernelName, const char **kernelString, const char * opt = NULL)
++ const oclMat &mask, String kernelName, const char **kernelString, const char * opt = NULL)
   {
-     bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0);
+     bitwise_scalar(src1, src2, dst, mask, kernelName, kernelString, 0, opt);
   }
   
   void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
@@@ -1877,15 -1880,16 +1881,16 @@@ void cv::ocl::bitwise_or(const oclMat &
       // dst.create(src1.size(),src1.type());
       if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
-     oclMat emptyMat;
-     String kernelName = mask.empty() ? "arithm_bitwise_or" : "arithm_bitwise_or_with_mask";
+ 
- -    string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
++    String kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
+     static const char opt [] = "-D OP_BINARY=|";
       if (mask.empty())
-         bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_or);
+         bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
       else
-         bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_or_mask);
+         bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
   }
   
   
@@@ -1893,14 -1897,15 +1898,15 @@@ void cv::ocl::bitwise_or(const oclMat &
   {
       if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
-     String kernelName = mask.data ? "arithm_s_bitwise_or_with_mask" : "arithm_s_bitwise_or";
+     static const char opt [] = "-D OP_BINARY=|";
- -    string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
++    String kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
       if (mask.data)
-         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_or_scalar_mask);
+         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
       else
-         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_or_scalar);
+         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
   }
   
   void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
@@@ -1913,43 -1918,45 +1919,45 @@@
       }
       oclMat emptyMat;
   
-     String kernelName = mask.empty() ? "arithm_bitwise_and" : "arithm_bitwise_and_with_mask";
- -    string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
++    String kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
   
+     static const char opt [] = "-D OP_BINARY=&";
       if (mask.empty())
-         bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_and);
+         bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
       else
-         bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_mask);
+         bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
   }
   
   void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
   {
       if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
-     String kernelName = mask.data ? "arithm_s_bitwise_and_with_mask" : "arithm_s_bitwise_and";
+     static const char opt [] = "-D OP_BINARY=&";
- -    string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
++    String kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
       if (mask.data)
-         bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_scalar_mask);
+         bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
       else
-         bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_scalar);
+         bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
   }
   
   void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
   {
       if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
-     oclMat emptyMat;
-     String kernelName = mask.empty() ? "arithm_bitwise_xor" : "arithm_bitwise_xor_with_mask";
- -    string kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
++    String kernelName = mask.empty() ? "arithm_bitwise_binary" : "arithm_bitwise_binary_with_mask";
   
+     static const char opt [] = "-D OP_BINARY=^";
   
       if (mask.empty())
-         bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_xor);
+         bitwise_run(src1, src2, dst, kernelName, &arithm_bitwise_binary, opt);
       else
-         bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_mask);
+         bitwise_run(src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_mask, opt);
   }
   
   
@@@ -1958,14 -1965,15 +1966,15 @@@ void cv::ocl::bitwise_xor(const oclMat 
   
       if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
-     String kernelName = mask.data ? "arithm_s_bitwise_xor_with_mask" : "arithm_s_bitwise_xor";
- -    string kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
++    String kernelName = mask.data ? "arithm_s_bitwise_binary_with_mask" : "arithm_s_bitwise_binary";
+     static const char opt [] = "-D OP_BINARY=^";
       if (mask.data)
-         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar_mask);
+         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar_mask, opt);
       else
-         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar);
+         bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_binary_scalar, opt);
   }
   
   oclMat cv::ocl::operator ~ (const oclMat &src)
diff --cc modules/ocl/src/brute_force_matcher.cpp
Simple merge
diff --cc modules/ocl/src/gemm.cpp
Simple merge
diff --cc modules/ocl/src/haar.cpp

index 5206f96,5afe542..09e1816
--- 1/modules/ocl/src/haar.cpp
--- 2/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@@ -908,7 -899,8 +898,8 @@@ CvSeq *cv::ocl::OclCascadeClassifier::o
   
       if( CV_MAT_CN(gimg.type()) > 1 )
       {
- -        cvtColor( gimg, gtemp, CV_BGR2GRAY );
+         oclMat gtemp;
+ +        cvtColor( gimg, gtemp, COLOR_BGR2GRAY );
           gimg = gtemp;
       }
   
@@@ -926,17 -916,14 +915,14 @@@
           int totalheight = 0;
           int indexy = 0;
           CvSize sz;
-         //t = (double)cvGetTickCount();
- -        vector<CvSize> sizev;
- -        vector<float> scalev;
+ +        std::vector<CvSize> sizev;
+ +        std::vector<float> scalev;
           for(factor = 1.f;; factor *= scaleFactor)
           {
- -            CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
+ +            CvSize winSize( cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) );
               sz.width     = cvRound( gimg.cols / factor ) + 1;
               sz.height    = cvRound( gimg.rows / factor ) + 1;
- -            CvSize sz1     = { sz.width - winSize0.width - 1,      sz.height - winSize0.height - 1 };
+ +            CvSize sz1( sz.width - winSize0.width - 1,      sz.height - winSize0.height - 1 );
   
               if( sz1.width <= 0 || sz1.height <= 0 )
                   break;
@@@ -1086,34 -1026,26 +1025,26 @@@
           pq.s[3] = gcascade->pq3;
           float correction = gcascade->inv_window_area;
   
-         //int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
-         //int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
-         //   openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
-         //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer));
- 
- -        vector<pair<size_t, const void *> > args;
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&pixelstep ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
- -        args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
- -        args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
- -        args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
+ +        std::vector<std::pair<size_t, const void *> > args;
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&pixelstep ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&p ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&pq ));
+ +        args.push_back ( std::make_pair(sizeof(cl_float) , (void *)&correction ));
   
           openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
-         //t = (double)cvGetTickCount() - t;
-         //printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
-         //t = (double)cvGetTickCount();
-         //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
+ 
           openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
   
           for(int i = 0; i < outputsz; i++)
@@@ -1246,44 -1167,38 +1164,38 @@@
           int step = gsum.step / 4;
           int startnode = 0;
           int splitstage = 3;
-         int splitnode = stage[0].count + stage[1].count + stage[2].count;
           stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
-         //openCLVerifyCall(status);
-         openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+         openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
           candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
-         //openCLVerifyCall(status);
           scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
-         //openCLVerifyCall(status);
-         openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
+         openCLSafeCall(clEnqueueWriteBuffer(qu, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
           pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
-         openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
+         openCLSafeCall(clEnqueueWriteBuffer(qu, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
           correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
-         openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
-         //int argcount = 0;
+         openCLSafeCall(clEnqueueWriteBuffer(qu, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
   
- -        vector<pair<size_t, const void *> > args;
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
+ +        std::vector<std::pair<size_t, const void *> > args;
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
++        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
++        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&step ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
-         args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&nodenum ));
   
- 
           openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
   
-         //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
-         candidate = (int *)clEnqueueMapBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
+         candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
   
           for(int i = 0; i < outputsz; i++)
           {
@@@ -1317,6 -1502,263 +1499,263 @@@ void cv::ocl::OclCascadeClassifierBuf::
       else
           rweights.resize(rectList.size(), 0);
   
- -        } 
+     GenResult(faces, rectList, rweights);
+ }
+ 
+ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
+     double scaleFactor, int flags,
+     const int outputsz, const size_t localThreads[],
+     CvSize minSize, CvSize maxSize)
+ {
+     CvHaarClassifierCascade      *cascade = oldCascade;
+ 
+     if( !CV_IS_HAAR_CLASSIFIER(cascade) )
+         CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
+ 
+     if( scaleFactor <= 1 )
+         CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
+ 
+     if( cols < minSize.width || rows < minSize.height )
+         CV_Error(CV_StsError, "Image too small");
+ 
+     int datasize=0;
+     int totalclassifier=0;
+ 
+     if( !cascade->hid_cascade )
+         gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
+ 
+     if( maxSize.height == 0 || maxSize.width == 0 )
+     {
+         maxSize.height = rows;
+         maxSize.width = cols;
+     }
+ 
+     findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
+     if( findBiggestObject )
+         flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
+ 
+     CreateBaseBufs(datasize, totalclassifier, flags, outputsz);
+     CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize);
+ 
+     m_scaleFactor = scaleFactor;
+     m_rows = rows;
+     m_cols = cols;
+     m_flags = flags;
+     m_minSize = minSize;
+     m_maxSize = maxSize;
+ 
+     initialized = true;
+ }
+ 
+ void cv::ocl::OclCascadeClassifierBuf::CreateBaseBufs(const int datasize, const int totalclassifier,
+                                                       const int flags, const int outputsz)
+ {
+     if (!initialized)
+     {
+         buffers = malloc(sizeof(OclBuffers));
+ 
+         size_t tempSize =
+             sizeof(GpuHidHaarStageClassifier) * ((GpuHidHaarClassifierCascade *)oldCascade->hid_cascade)->count;
+         m_nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) - tempSize - sizeof(GpuHidHaarClassifier) * totalclassifier)
+             / sizeof(GpuHidHaarTreeNode);
+ 
+         ((OclBuffers *)buffers)->stagebuffer     = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,  tempSize);
+         ((OclBuffers *)buffers)->nodebuffer      = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,  m_nodenum * sizeof(GpuHidHaarTreeNode));
+     }
+ 
+     if (initialized
+         && ((m_flags & CV_HAAR_SCALE_IMAGE) ^ (flags & CV_HAAR_SCALE_IMAGE)))
+     {
+         openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
+     }
+ 
+     if (flags & CV_HAAR_SCALE_IMAGE)
+     {
+         ((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
+                                                         CL_MEM_WRITE_ONLY,
+                                                         4 * sizeof(int) * outputsz);
+     }
+     else
+     {
+         ((OclBuffers *)buffers)->candidatebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(),
+                                                         CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+                                                         4 * sizeof(int) * outputsz);
+     }
+ }
+ 
+ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
+     const int rows, const int cols, const int flags,
+     const double scaleFactor, const size_t localThreads[],
+     CvSize minSize, CvSize maxSize)
+ {
+     if (initialized)
+     {
+         if ((m_flags & CV_HAAR_SCALE_IMAGE) && !(flags & CV_HAAR_SCALE_IMAGE))
+         {
+             gimg1.release();
+             gsum.release();
+             gsqsum.release();
- -        } 
++        }
+         else if (!(m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
+         {
+             openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
+             openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
+             openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
+         }
+         else if ((m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
+         {
+             if (fabs(m_scaleFactor - scaleFactor) < 1e-6
+                 && (rows == m_rows && cols == m_cols)
+                 && (minSize.width == m_minSize.width)
+                 && (minSize.height == m_minSize.height)
+                 && (maxSize.width == m_maxSize.width)
+                 && (maxSize.height == m_maxSize.height))
+             {
+                 return;
+             }
++        }
+         else
+         {
+             if (fabs(m_scaleFactor - scaleFactor) < 1e-6
+                 && (rows == m_rows && cols == m_cols)
+                 && (minSize.width == m_minSize.width)
+                 && (minSize.height == m_minSize.height)
+                 && (maxSize.width == m_maxSize.width)
+                 && (maxSize.height == m_maxSize.height))
+             {
+                 return;
+             }
+             else
+             {
+                 openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
+                 openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
+                 openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
+             }
+         }
+     }
+ 
+     int loopcount;
+     int indexy = 0;
+     int totalheight = 0;
+     double factor;
+     Rect roi;
+     CvSize sz;
+     CvSize winSize0 = oldCascade->orig_window_size;
+     detect_piramid_info *scaleinfo;
+     if (flags & CV_HAAR_SCALE_IMAGE)
+     {
+         for(factor = 1.f;; factor *= scaleFactor)
+         {
+             CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
+             sz.width     = cvRound( cols / factor ) + 1;
+             sz.height    = cvRound( rows / factor ) + 1;
+             CvSize sz1     = { sz.width - winSize0.width - 1,      sz.height - winSize0.height - 1 };
+ 
+             if( sz1.width <= 0 || sz1.height <= 0 )
+                 break;
+             if( winSize.width > maxSize.width || winSize.height > maxSize.height )
+                 break;
+             if( winSize.width < minSize.width || winSize.height < minSize.height )
+                 continue;
+ 
+             totalheight += sz.height;
+             sizev.push_back(sz);
+             scalev.push_back(static_cast<float>(factor));
+         }
+ 
+         loopcount = sizev.size();
+         gimg1.create(rows, cols, CV_8UC1);
+         gsum.create(totalheight + 4, cols + 1, CV_32SC1);
+         gsqsum.create(totalheight + 4, cols + 1, CV_32FC1);
+ 
+         scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
+         for( int i = 0; i < loopcount; i++ )
+         {
+             sz = sizev[i];
+             roi = Rect(0, indexy, sz.width, sz.height);
+             int width = sz.width - 1 - oldCascade->orig_window_size.width;
+             int height = sz.height - 1 - oldCascade->orig_window_size.height;
+             int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
+             int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
+ 
+             ((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
+             ((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
+             ((detect_piramid_info *)scaleinfo)[i].imgoff = gsum(roi).offset >> 2;
+             ((detect_piramid_info *)scaleinfo)[i].factor = scalev[i];
+ 
+             indexy += sz.height;
+         }
+     }
+     else
+     {
+         for(factor = 1;
+             cvRound(factor * winSize0.width) < cols - 10 && cvRound(factor * winSize0.height) < rows - 10;
+             factor *= scaleFactor)
+         {
+             CvSize winSize = { cvRound( winSize0.width * factor ), cvRound( winSize0.height * factor ) };
+             if( winSize.width < minSize.width || winSize.height < minSize.height )
+             {
+                 continue;
+             }
+             sizev.push_back(winSize);
+             scalev.push_back(factor);
+         }
+ 
+         loopcount = scalev.size();
+         if(loopcount == 0)
+         {
+             loopcount = 1;
+             sizev.push_back(minSize);
+             scalev.push_back( min(cvRound(minSize.width / winSize0.width), cvRound(minSize.height / winSize0.height)) );
+         }
+ 
+         ((OclBuffers *)buffers)->pbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
+             sizeof(cl_int4) * loopcount);
+         ((OclBuffers *)buffers)->correctionbuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY,
+             sizeof(cl_float) * loopcount);
+         ((OclBuffers *)buffers)->newnodebuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_WRITE,
+             loopcount * m_nodenum * sizeof(GpuHidHaarTreeNode));
+ 
+         scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
+         for( int i = 0; i < loopcount; i++ )
+         {
+             sz = sizev[i];
+             factor = scalev[i];
+             int ystep = cvRound(std::max(2., factor));
+             int width = (cols - 1 - sz.width  + ystep - 1) / ystep;
+             int height = (rows - 1 - sz.height + ystep - 1) / ystep;
+             int grpnumperline = (width + localThreads[0] - 1) / localThreads[0];
+             int totalgrp = ((height + localThreads[1] - 1) / localThreads[1]) * grpnumperline;
+ 
+             ((detect_piramid_info *)scaleinfo)[i].width_height = (width << 16) | height;
+             ((detect_piramid_info *)scaleinfo)[i].grpnumperline_totalgrp = (grpnumperline << 16) | totalgrp;
+             ((detect_piramid_info *)scaleinfo)[i].imgoff = 0;
+             ((detect_piramid_info *)scaleinfo)[i].factor = factor;
+         }
+     }
+ 
+     if (loopcount != m_loopcount)
+     {
+         if (initialized)
+         {
+             openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
+         }
+         ((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
+     }
+ 
+     openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
+         sizeof(detect_piramid_info)*loopcount,
+         scaleinfo, 0, NULL, NULL));
+     free(scaleinfo);
+ 
+     m_loopcount = loopcount;
+ }
+ 
+ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& faces,
+                                                  const std::vector<cv::Rect> &rectList,
+                                                  const std::vector<int> &rweights)
+ {
+     CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) );
   
       if( findBiggestObject && rectList.size() )
       {
diff --cc modules/ocl/src/imgproc.cpp

index 812a1b2,ee1e92a..d703a61
--- 1/modules/ocl/src/imgproc.cpp
--- 2/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@@ -1029,42 -1026,53 +1027,53 @@@ namespace c
               int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
   
               oclMat t_sum , t_sqsum;
-             t_sum.create(src.cols, src.rows, CV_32SC1);
-             t_sqsum.create(src.cols, src.rows, CV_32FC1);
- 
               int w = src.cols + 1, h = src.rows + 1;
-             sum.create(h, w, CV_32SC1);
+             int depth;
+             if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255
+             {
+                 t_sum.create(src.cols, src.rows, CV_32SC1);
+                 sum.create(h, w, CV_32SC1);
+             }
+             else
+             {
+                  //Use float to prevent overflow
+                 t_sum.create(src.cols, src.rows, CV_32FC1);
+                 sum.create(h, w, CV_32FC1);
- -             }
- -             t_sqsum.create(src.cols, src.rows, CV_32FC1);
- -             sqsum.create(h, w, CV_32FC1);
- -             depth = sum.depth();
- -             int sum_offset = sum.offset / vlen;
- -             int sqsum_offset = sqsum.offset / vlen;
- -
- -             vector<pair<size_t , const void *> > args;
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- -             size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- -             openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
- -             args.clear();
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
- -             size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
- -             openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
++            }
++            t_sqsum.create(src.cols, src.rows, CV_32FC1);
+ +            sqsum.create(h, w, CV_32FC1);
-             int sum_offset = sum.offset / vlen, sqsum_offset = sqsum.offset / vlen;
++            depth = sum.depth();
++            int sum_offset = sum.offset / vlen;
++            int sqsum_offset = sqsum.offset / vlen;
+ +
+ +            std::vector<std::pair<size_t , const void *> > args;
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ +            size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
-             openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, -1);
++            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
+ +            args.clear();
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sqsum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum.step));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
+ +            size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
-             openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, -1);
-             //std::cout << "tested" << std::endl;
++            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
           }
+ 
           void integral(const oclMat &src, oclMat &sum)
           {
               CV_Assert(src.type() == CV_8UC1);
@@@ -1074,34 -1082,40 +1083,42 @@@
               int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
   
               oclMat t_sum;
-             t_sum.create(src.cols, src.rows, CV_32SC1);
- 
               int w = src.cols + 1, h = src.rows + 1;
-             sum.create(h, w, CV_32SC1);
+             int depth;
+             if(src.cols * src.rows <= 2901 * 2901)
+             {
+                 t_sum.create(src.cols, src.rows, CV_32SC1);
+                 sum.create(h, w, CV_32SC1);
+             }else
+             {
+                  t_sum.create(src.cols, src.rows, CV_32FC1);
+                  sum.create(h, w, CV_32FC1);
- -             }
- -             depth = sum.depth();
- -             int sum_offset = sum.offset / vlen;
- -             vector<pair<size_t , const void *> > args;
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&offset ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
- -             size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
- -             openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
- -             args.clear();
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
- -             args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
- -             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
- -             size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
- -             openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);         
++            }
++            depth = sum.depth();
+ +            int sum_offset = sum.offset / vlen;
+ +
+ +            std::vector<std::pair<size_t , const void *> > args;
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&pre_invalid ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+ +            size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
-             openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, -1);
++            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
+ +            args.clear();
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&sum.data ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum.step));
+ +            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sum_offset));
+ +            size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
-             openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, -1);
++            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
+ +            //std::cout << "tested" << std::endl;
           }
   
           /////////////////////// corner //////////////////////////////
diff --cc modules/ocl/src/initialization.cpp

index 00dd1c6,799c49c..1a96f78
--- 1/modules/ocl/src/initialization.cpp
--- 2/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@@ -126,8 -129,9 +126,10 @@@ namespace c
           {
               cl_platform_id oclplatform;
               std::vector<cl_device_id> devices;
- -            std::vector<std::string> devName;
- -            std::string clVersion;
+ +            std::vector<String> devName;
+ +            String platName;
++            String clVersion;
+ 
               cl_context oclcontext;
               cl_command_queue clCmdQueue;
               int devnum;
@@@ -260,8 -264,9 +262,8 @@@
   
           int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
           {
-             if( (mem_type == DEVICE_MEM_PM && 
- -            if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
- -                 mem_type == DEVICE_MEM_UHP ||
- -                 mem_type == DEVICE_MEM_CHP )
++            if( (mem_type == DEVICE_MEM_PM &&
+ +                 Context::getContext()->impl->unified_memory == 0) )
                   return -1;
               gDeviceMemRW = rw_type;
               gDeviceMemType = mem_type;
@@@ -298,14 -303,11 +300,15 @@@
               std::vector<cl_platform_id> platforms(numPlatforms);
               openCLSafeCall(clGetPlatformIDs(numPlatforms, &platforms[0], 0));
   
- -            char deviceName[256];
               int devcienums = 0;
+ +
+ +            const static int max_name_length = 256;
+ +            char deviceName[max_name_length];
+ +            char plfmName[max_name_length];
+             char clVersion[256];
               for (unsigned i = 0; i < numPlatforms; ++i)
               {
+ +
                   cl_uint numsdev;
                   cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev);
                   if(status != CL_DEVICE_NOT_FOUND)
@@@ -318,10 -320,9 +321,12 @@@
                       openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, &devices[0], 0));
   
                       Info ocltmpinfo;
+ +                    openCLSafeCall(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(plfmName), plfmName, NULL));
+ +                    ocltmpinfo.PlatformName = String(plfmName);
+ +                    ocltmpinfo.impl->platName = String(plfmName);
                       ocltmpinfo.impl->oclplatform = platforms[i];
+                     openCLSafeCall(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(clVersion), clVersion, NULL));
+                     ocltmpinfo.impl->clVersion = clVersion;
                       for(unsigned j = 0; j < numsdev; ++j)
                       {
                           ocltmpinfo.impl->devices.push_back(devices[j]);
@@@ -424,17 -446,11 +429,17 @@@
           }
   
           void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
-                                  size_t widthInBytes, size_t height, 
- -                               size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
++                                 size_t widthInBytes, size_t height,
+ +                                 DevMemRW rw_type, DevMemType mem_type, void* hptr)
           {
               cl_int status;
- -            *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
- -                                      widthInBytes * height, 0, &status);
+ +            if(hptr && (mem_type==DEVICE_MEM_UHP || mem_type==DEVICE_MEM_CHP))
-                 *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, 
-                                           gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], 
++                *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext,
++                                          gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+ +                                          widthInBytes * height, hptr, &status);
+ +            else
+ +                *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
+ +                                          widthInBytes * height, 0, &status);
               openCLVerifyCall(status);
               *pitch = widthInBytes;
           }
@@@ -985,6 -1001,8 +990,8 @@@
                   return impl->double_support == 1;
               case CL_UNIFIED_MEM:
                   return impl->unified_memory == 1;
- -                return impl->clVersion.find("OpenCL 1.2") != string::npos;
+             case CL_VER_1_2:
++                return impl->clVersion.find("OpenCL 1.2") != String::npos;
               default:
                   return false;
               }
diff --cc modules/ocl/src/matrix_operations.cpp

index 4d697a2,268a1fe..25252ea
--- 1/modules/ocl/src/matrix_operations.cpp
--- 2/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@@ -193,11 -218,7 +193,11 @@@ void cv::ocl::oclMat::upload(const Mat 
       }
       else
       {
- -        openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
+ +        // try to use host ptr
+ +        createEx(wholeSize, m.type(), gDeviceMemRW, gDeviceMemType, m.datastart);
+ +        if(gDeviceMemType!=DEVICE_MEM_UHP && gDeviceMemType!=DEVICE_MEM_CHP)
-             openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, 
++            openCLMemcpy2D(clCxt, data, step, m.datastart, m.step,
+ +                           wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
       }
   
       rows = m.rows;
@@@ -568,38 -590,34 +568,34 @@@ static void set_to_withoutmask_run(cons
           }
           break;
       default:
- -        CV_Error(CV_StsUnsupportedFormat, "unknown depth");
+ +        CV_Error(Error::StsUnsupportedFormat, "unknown depth");
       }
   #ifdef CL_VERSION_1_2
-     if(dst.offset == 0 && dst.cols == dst.wholecols)
+     //this enables backwards portability to
+     //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
+     if(Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
+         dst.offset == 0 && dst.cols == dst.wholecols)
       {
-         clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
- -        clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), 
++        clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
+             (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
       }
       else
+ #endif
       {
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
+ +        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
+ +        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
+ +        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
+ +        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
           openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
-                             localThreads, args, -1, -1, compile_option);
+             localThreads, args, -1, -1, compile_option);
       }
- #else
-     args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
-     openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
-                         localThreads, args, -1, -1, compile_option);
- #endif
   }
   
- -static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName)
+ +static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName)
   {
       CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
- -    vector<pair<size_t , const void *> > args;
+ +    std::vector<std::pair<size_t , const void *> > args;
       size_t localThreads[3] = {16, 16, 1};
       size_t globalThreads[3];
       globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
@@@ -887,10 -910,9 +883,10 @@@ oclMat cv::ocl::oclMat::reshape(int new
   
   }
   
- void cv::ocl::oclMat::createEx(Size size, int type, 
- -void cv::ocl::oclMat::createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type)
++void cv::ocl::oclMat::createEx(Size size, int type,
+ +                               DevMemRW rw_type, DevMemType mem_type, void* hptr)
   {
- -    createEx(size.height, size.width, type, rw_type, mem_type);
+ +    createEx(size.height, size.width, type, rw_type, mem_type, hptr);
   }
   
   void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
@@@ -898,8 -920,7 +894,8 @@@
       createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType);
   }
   
- void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, 
- -void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type, DevMemType mem_type)
++void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type,
+ +                               DevMemRW rw_type, DevMemType mem_type, void* hptr)
   {
       clCxt = Context::getContext();
       /* core logic */
@@@ -919,8 -945,8 +915,8 @@@
           size_t esz = elemSize();
   
           void *dev_ptr;
-         openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), 
- -        openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type);
- -        //openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows);
++        openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols),
+ +                            rows, rw_type, mem_type, hptr);
   
           if (esz * cols == step)
               flags |= Mat::CONTINUOUS_FLAG;
diff --cc modules/ocl/src/mcwutil.cpp

index 118df52,3bcb870..0bdaf0d
--- 1/modules/ocl/src/mcwutil.cpp
--- 2/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@@ -160,30 -159,44 +159,44 @@@ namespace c
                   CV_Error(-1, "Image forma is not supported");
                   break;
               }
- #if CL_VERSION_1_2
-             cl_image_desc desc;
-             desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
-             desc.image_width      = mat.cols;
-             desc.image_height     = mat.rows;
-             desc.image_depth      = 0;
-             desc.image_array_size = 1;
-             desc.image_row_pitch  = 0;
-             desc.image_slice_pitch = 0;
-             desc.buffer           = NULL;
-             desc.num_mip_levels   = 0;
-             desc.num_samples      = 0;
-             texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
- #else
-             texture = clCreateImage2D(
-                 (cl_context)mat.clCxt->oclContext(),
-                 CL_MEM_READ_WRITE,
-                 &format,
-                 mat.cols,
-                 mat.rows,
-                 0,
-                 NULL,
-                 &err);
+ #ifdef CL_VERSION_1_2
+             //this enables backwards portability to
+             //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
+             if(Context::getContext()->supportsFeature(Context::CL_VER_1_2))
+             {
+                 cl_image_desc desc;
+                 desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
+                 desc.image_width      = mat.cols;
+                 desc.image_height     = mat.rows;
+                 desc.image_depth      = 0;
+                 desc.image_array_size = 1;
+                 desc.image_row_pitch  = 0;
+                 desc.image_slice_pitch = 0;
+                 desc.buffer           = NULL;
+                 desc.num_mip_levels   = 0;
+                 desc.num_samples      = 0;
- -                texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);            
++                texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+             }
+             else
+ #endif
+             {
+ #ifdef __GNUC__
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ #endif
+                 texture = clCreateImage2D(
+                     (cl_context)mat.clCxt->oclContext(),
+                     CL_MEM_READ_WRITE,
+                     &format,
+                     mat.cols,
+                     mat.rows,
+                     0,
+                     NULL,
+                     &err);
+ #ifdef __GNUC__
+ #pragma GCC diagnostic pop
   #endif
+             }
               size_t origin[] = { 0, 0, 0 };
               size_t region[] = { mat.cols, mat.rows, 1 };
   
@@@ -195,8 -208,8 +208,8 @@@
                   const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                   clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
                       regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
- -                clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); 
+ +                clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
-            }
+             }
               else
               {
                   devData = (cl_mem)mat.data;
diff --cc modules/ocl/src/moments.cpp
Simple merge
diff --cc modules/ocl/src/opencl/arithm_bitwise_binary.cl
Simple merge
diff --cc modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
Simple merge
diff --cc modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
Simple merge
diff --cc modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
Simple merge
diff --cc modules/ocl/src/opencl/haarobjectdetect.cl
Simple merge
diff --cc modules/ocl/src/opencl/moments.cl
Simple merge
diff --cc modules/ocl/src/opencl/stereobp.cl
Simple merge
diff --cc modules/ocl/src/stereo_csbp.cpp

index 0000000,44662ea..f124488

mode 000000,100644..100644
--- /dev/null
--- 2/modules/ocl/src/stereo_csbp.cpp
+++ b/modules/ocl/src/stereo_csbp.cpp
@@@ -1,0 -1,763 +1,756 @@@
- -        ///////////////////////////OpenCL kernel strings///////////////////////////
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+ // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // @Authors
+ //    Jia Haipeng, jiahaipeng95@gmail.com
+ //    Jin Ma, jin@multicorewareinc.com
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other oclMaterials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #include "precomp.hpp"
+ 
+ using namespace cv;
+ using namespace cv::ocl;
+ using namespace std;
+ 
+ #if !defined (HAVE_OPENCL)
+ 
+ namespace cv
+ {
+     namespace ocl
+     {
+ 
+         void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int &, int &, int &, int &)
+         {
+             throw_nogpu();
+         }
+         cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int)
+         {
+             throw_nogpu();
+         }
+         cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float,
+             float, float, int, int)
+         {
+             throw_nogpu();
+         }
+ 
+         void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &, const oclMat &, oclMat &)
+         {
+             throw_nogpu();
+         }
+     }
+ }
+ 
+ #else /* !defined (HAVE_OPENCL) */
+ 
+ namespace cv
+ {
+     namespace ocl
+     {
+ 
- -            static string get_kernel_name(string kernel_name, int data_type)
++        ///////////////////////////OpenCL kernel Strings///////////////////////////
+         extern const char *stereocsbp;
+     }
+ 
+ }
+ namespace cv
+ {
+     namespace ocl
+     {
+         namespace stereoCSBP
+         {
+             //////////////////////////////////////////////////////////////////////////
+             //////////////////////////////common////////////////////////////////////
+             ////////////////////////////////////////////////////////////////////////
+             static inline int divUp(int total, int grain)
+             {
+                 return (total + grain - 1) / grain;
+             }
- -                stringstream idxStr;
- -                if(data_type == CV_16S)
- -                    idxStr << "0";
- -                else
- -                    idxStr << "1";
- -                kernel_name += idxStr.str();
- -
- -                return kernel_name;
++            static String get_kernel_name(String kernel_name, int data_type)
+             {
- -                string kernelName = get_kernel_name("init_data_cost_", data_type);
++                return kernel_name + (data_type == CV_16S ? "0" : "1");
+             }
+             using cv::ocl::StereoConstantSpaceBP;
+             //////////////////////////////////////////////////////////////////////////////////
+             /////////////////////////////////init_data_cost//////////////////////////////////
+             //////////////////////////////////////////////////////////////////////////////////
+             static void init_data_cost_caller(const oclMat &left, const oclMat &right, oclMat &temp,
+                 StereoConstantSpaceBP &rthis,
+                 int msg_step, int h, int w, int level)
+             {
+                 Context  *clCxt = left.clCxt;
+                 int data_type = rthis.msg_type;
+                 int channels = left.oclchannels();
+ 
- -                string kernelName = get_kernel_name("init_data_cost_reduce_", data_type);
++                String kernelName = get_kernel_name("init_data_cost_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 //size_t blockSize = 256;
+                 size_t localThreads[]  = {32, 8 ,1};
+                 size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+                     divUp(h, localThreads[1]) *localThreads[1],
+                     1
+                 };
+ 
+                 int cdisp_step1 = msg_step * h;
+                 openCLVerifyKernel(clCxt, kernel,  localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem),  (void *)&temp.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem),  (void *)&left.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem),  (void *)&right.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int),  (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int),  (void *)&w));
+                 openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int),  (void *)&level));
+                 openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int),  (void *)&channels));
+                 openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int),  (void *)&msg_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&rthis.data_weight));
+                 openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_float), (void *)&rthis.max_data_term));
+                 openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&cdisp_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th));
+                 openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step));
+                 openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+ 
+             static void init_data_cost_reduce_caller(const oclMat &left, const oclMat &right, oclMat &temp,
+                 StereoConstantSpaceBP &rthis,
+                 int msg_step, int h, int w, int level)
+             {
+ 
+                 Context  *clCxt = left.clCxt;
+                 int data_type = rthis.msg_type;
+                 int channels = left.oclchannels();
+                 int win_size = (int)std::pow(2.f, level);
+ 
- -                string kernelName = get_kernel_name("get_first_k_initial_local_", data_type);
++                String kernelName = get_kernel_name("init_data_cost_reduce_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 const int threadsNum = 256;
+                 //size_t blockSize = threadsNum;
+                 size_t localThreads[3]  = {win_size, 1, threadsNum / win_size};
+                 size_t globalThreads[3] = {w *localThreads[0],
+                     h * divUp(rthis.ndisp, localThreads[2]) *localThreads[1], 1 * localThreads[2]
+                 };
+ 
+                 int local_mem_size = threadsNum * sizeof(float);
+                 int cdisp_step1 = msg_step * h;
+ 
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+ 
+                 openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&temp.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&left.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&right.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 3,  local_mem_size,  (void *)NULL));
+                 openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_int),  (void *)&level));
+                 openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_int),  (void *)&left.rows));
+                 openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_int),  (void *)&left.cols));
+                 openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&win_size));
+                 openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&channels));
+                 openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&rthis.ndisp));
+                 openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int),  (void *)&left.step));
+                 openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_float), (void *)&rthis.data_weight));
+                 openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.max_data_term));
+                 openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int),  (void *)&rthis.min_disp_th));
+                 openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int),  (void *)&cdisp_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int),  (void *)&msg_step));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+ 
+             static void get_first_initial_local_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
+                 oclMat &temp, StereoConstantSpaceBP &rthis,
+                 int h, int w, int nr_plane, int msg_step)
+             {
+                 Context  *clCxt = temp.clCxt;
+                 int data_type = rthis.msg_type;
+ 
- -                string kernelName = get_kernel_name("get_first_k_initial_global_", data_type);
++                String kernelName = get_kernel_name("get_first_k_initial_local_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 //size_t blockSize = 256;
+                 size_t localThreads[]  = {32, 8 ,1};
+                 size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+                     divUp(h, localThreads[1]) *localThreads[1],
+                     1
+                 };
+ 
+                 int disp_step = msg_step * h;
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
+                 openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
+                 openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
+                 openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
+                 openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+             static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
+                 oclMat &temp, StereoConstantSpaceBP &rthis,
+                 int h, int w, int nr_plane, int msg_step)
+             {
+                 Context  *clCxt = temp.clCxt;
+                 int data_type = rthis.msg_type;
+ 
- -                string kernelName = get_kernel_name("compute_data_cost_", data_type);
++                String kernelName = get_kernel_name("get_first_k_initial_global_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 //size_t blockSize = 256;
+                 size_t localThreads[]  = {32, 8, 1};
+                 size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+                     divUp(h, localThreads[1]) *localThreads[1],
+                     1
+                 };
+ 
+                 int disp_step = msg_step * h;
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_cost_selected));
+                 openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&disp_selected_pyr));
+                 openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&temp.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&w));
+                 openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&nr_plane));
+                 openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+ 
+             static void init_data_cost(const oclMat &left, const oclMat &right, oclMat &temp, StereoConstantSpaceBP &rthis,
+                 uchar *disp_selected_pyr, uchar *data_cost_selected,
+                 size_t msg_step, int h, int w, int level, int nr_plane)
+             {
+ 
+                 if(level <= 1)
+                     init_data_cost_caller(left, right, temp, rthis, msg_step, h, w, level);
+                 else
+                     init_data_cost_reduce_caller(left, right, temp, rthis, msg_step, h, w, level);
+ 
+                 if(rthis.use_local_init_data_cost == true)
+                 {
+                     get_first_initial_local_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w, nr_plane, msg_step);
+                 }
+                 else
+                 {
+                     get_first_initial_global_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w,
+                         nr_plane, msg_step);
+                 }
+             }
+ 
+             ///////////////////////////////////////////////////////////////////////////////////////////////////
+             ///////////////////////////////////compute_data_cost//////////////////////////////////////////////
+             ////////////////////////////////////////////////////////////////////////////////////////////////
+             static void compute_data_cost_caller(uchar *disp_selected_pyr, uchar *data_cost,
+                 StereoConstantSpaceBP &rthis, int msg_step1,
+                 int msg_step2, const oclMat &left, const oclMat &right, int h,
+                 int w, int h2, int level, int nr_plane)
+             {
+                 Context  *clCxt = left.clCxt;
+                 int channels = left.oclchannels();
+                 int data_type = rthis.msg_type;
+ 
- -                string kernelName = get_kernel_name("compute_data_cost_reduce_", data_type);
++                String kernelName = get_kernel_name("compute_data_cost_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 //size_t blockSize = 256;
+                 size_t localThreads[]  = {32, 8, 1};
+                 size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+                     divUp(h, localThreads[1]) *localThreads[1],
+                     1
+                 };
+ 
+                 int disp_step1 = msg_step1 * h;
+                 int disp_step2 = msg_step2 * h2;
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&disp_selected_pyr));
+                 openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&data_cost));
+                 openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&left.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem),  (void *)&right.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_int),  (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_int),  (void *)&w));
+                 openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_int),  (void *)&level));
+                 openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&nr_plane));
+                 openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&channels));
+                 openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&msg_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&msg_step2));
+                 openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int),  (void *)&disp_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int),  (void *)&disp_step2));
+                 openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_float), (void *)&rthis.data_weight));
+                 openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term));
+                 openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int),  (void *)&left.step));
+                 openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int),  (void *)&rthis.min_disp_th));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+             static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost,
+                 StereoConstantSpaceBP &rthis, int msg_step1,
+                 int msg_step2, const oclMat &left, const oclMat &right, int h,
+                 int w, int h2, int level, int nr_plane)
+             {
+                 Context  *clCxt = left.clCxt;
+                 int data_type = rthis.msg_type;
+                 int channels = left.oclchannels();
+                 int win_size = (int)std::pow(2.f, level);
+ 
- -                string kernelName = get_kernel_name("init_message_", data_type);
++                String kernelName = get_kernel_name("compute_data_cost_reduce_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 const size_t threadsNum = 256;
+                 //size_t blockSize = threadsNum;
+                 size_t localThreads[3]  = {win_size, 1, threadsNum / win_size};
+                 size_t globalThreads[3] = {w *localThreads[0],
+                     h * divUp(nr_plane, localThreads[2]) *localThreads[1], 1 * localThreads[2]
+                 };
+ 
+                 int disp_step1 = msg_step1 * h;
+                 int disp_step2 = msg_step2 * h2;
+                 size_t local_mem_size = threadsNum * sizeof(float);
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&disp_selected_pyr));
+                 openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&data_cost));
+                 openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&left.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem),  (void *)&right.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 4, local_mem_size,   (void *)NULL));
+                 openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_int),  (void *)&level));
+                 openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_int),  (void *)&left.rows));
+                 openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&left.cols));
+                 openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&nr_plane));
+                 openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&channels));
+                 openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int),  (void *)&win_size));
+                 openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int),  (void *)&msg_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int),  (void *)&msg_step2));
+                 openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int),  (void *)&disp_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int),  (void *)&disp_step2));
+                 openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_float), (void *)&rthis.data_weight));
+                 openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term));
+                 openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int),  (void *)&left.step));
+                 openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int),  (void *)&rthis.min_disp_th));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+             static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis,
+                 int msg_step1, int msg_step2, const oclMat &left, const oclMat &right, int h, int w,
+                 int h2, int level, int nr_plane)
+             {
+                 if(level <= 1)
+                     compute_data_cost_caller(disp_selected_pyr, data_cost, rthis, msg_step1, msg_step2,
+                     left, right, h, w, h2, level, nr_plane);
+                 else
+                     compute_data_cost_reduce_caller(disp_selected_pyr, data_cost, rthis,  msg_step1, msg_step2,
+                     left, right, h, w, h2, level, nr_plane);
+             }
+             ////////////////////////////////////////////////////////////////////////////////////////////////
+             //////////////////////////////////////init message//////////////////////////////////////////////
+             ////////////////////////////////////////////////////////////////////////////////////////////////
+             static void init_message(uchar *u_new, uchar *d_new, uchar *l_new, uchar *r_new,
+                 uchar *u_cur, uchar *d_cur, uchar *l_cur, uchar *r_cur,
+                 uchar *disp_selected_pyr_new, uchar *disp_selected_pyr_cur,
+                 uchar *data_cost_selected, uchar *data_cost, oclMat &temp, StereoConstantSpaceBP rthis,
+                 size_t msg_step1, size_t msg_step2, int h, int w, int nr_plane,
+                 int h2, int w2, int nr_plane2)
+             {
+                 Context  *clCxt = temp.clCxt;
+                 int data_type = rthis.msg_type;
+ 
- -                string kernelName = get_kernel_name("compute_message_", data_type);
++                String kernelName = get_kernel_name("init_message_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 //size_t blockSize = 256;
+                 size_t localThreads[]  = {32, 8, 1};
+                 size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
+                     divUp(h, localThreads[1]) *localThreads[1],
+                     1
+                 };
+ 
+                 int disp_step1 = msg_step1 * h;
+                 int disp_step2 = msg_step2 * h2;
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem), (void *)&u_new));
+                 openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem), (void *)&d_new));
+                 openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem), (void *)&l_new));
+                 openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem), (void *)&r_new));
+                 openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_mem), (void *)&u_cur));
+                 openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_mem), (void *)&d_cur));
+                 openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_mem), (void *)&l_cur));
+                 openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_mem), (void *)&r_cur));
+                 openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_mem), (void *)&temp.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_mem), (void *)&disp_selected_pyr_new));
+                 openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_mem), (void *)&disp_selected_pyr_cur));
+                 openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_mem), (void *)&data_cost_selected));
+                 openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_mem), (void *)&data_cost));
+                 openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&w));
+                 openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&nr_plane));
+                 openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&h2));
+                 openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_int), (void *)&w2));
+                 openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&nr_plane2));
+                 openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&disp_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2));
+                 openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1));
+                 openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+             ////////////////////////////////////////////////////////////////////////////////////////////////
+             ///////////////////////////calc_all_iterations////////////////////////////////////////////////
+             //////////////////////////////////////////////////////////////////////////////////////////////
+             static void calc_all_iterations_caller(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
+                 uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
+                 int msg_step, int h, int w, int nr_plane, int i)
+             {
+                 Context  *clCxt = temp.clCxt;
+                 int data_type = rthis.msg_type;
+ 
- -                string kernelName = get_kernel_name("compute_disp_", data_type);
++                String kernelName = get_kernel_name("compute_message_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+                 size_t localThreads[]  = {32, 8, 1};
+                 size_t globalThreads[] = {divUp(w, (localThreads[0]) << 1) *localThreads[0],
+                     divUp(h, localThreads[1]) *localThreads[1],
+                     1
+                 };
+ 
+                 int disp_step = msg_step * h;
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem),  (void *)&u));
+                 openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem),  (void *)&d));
+                 openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem),  (void *)&l));
+                 openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem),  (void *)&r));
+                 openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_mem),  (void *)&data_cost_selected));
+                 openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_mem),  (void *)&disp_selected_pyr));
+                 openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_mem),  (void *)&temp.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int),  (void *)&h));
+                 openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int),  (void *)&w));
+                 openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int),  (void *)&nr_plane));
+                 openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int),  (void *)&i));
+                 openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_float), (void *)&rthis.max_disc_term));
+                 openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int),  (void *)&disp_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int),  (void *)&msg_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+             static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
+                 uchar *disp_selected_pyr, oclMat &temp, StereoConstantSpaceBP rthis,
+                 int msg_step, int h, int w, int nr_plane)
+             {
+                 for(int t = 0; t < rthis.iters; t++)
+                     calc_all_iterations_caller(u, d, l, r, data_cost_selected, disp_selected_pyr, temp, rthis,
+                     msg_step, h, w, nr_plane, t & 1);
+             }
+ 
+             ///////////////////////////////////////////////////////////////////////////////////////////////
+             //////////////////////////compute_disp////////////////////////////////////////////////////////
+             /////////////////////////////////////////////////////////////////////////////////////////////
+             static void compute_disp(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
+                 uchar *disp_selected_pyr, StereoConstantSpaceBP &rthis, size_t msg_step,
+                 oclMat &disp, int nr_plane)
+             {
+                 Context  *clCxt = disp.clCxt;
+                 int data_type = rthis.msg_type;
+ 
- -    int rows = left.rows;  
- -    int cols = left.cols; 
++                String kernelName = get_kernel_name("compute_disp_", data_type);
+ 
+                 cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
+ 
+                 //size_t blockSize = 256;
+                 size_t localThreads[]  = {32, 8, 1};
+                 size_t globalThreads[] = {divUp(disp.cols, localThreads[0]) *localThreads[0],
+                     divUp(disp.rows, localThreads[1]) *localThreads[1],
+                     1
+                 };
+ 
+                 int step_size = disp.step / disp.elemSize();
+                 int disp_step = disp.rows * msg_step;
+                 openCLVerifyKernel(clCxt, kernel, localThreads);
+                 openCLSafeCall(clSetKernelArg(kernel, 0,  sizeof(cl_mem), (void *)&u));
+                 openCLSafeCall(clSetKernelArg(kernel, 1,  sizeof(cl_mem), (void *)&d));
+                 openCLSafeCall(clSetKernelArg(kernel, 2,  sizeof(cl_mem), (void *)&l));
+                 openCLSafeCall(clSetKernelArg(kernel, 3,  sizeof(cl_mem), (void *)&r));
+                 openCLSafeCall(clSetKernelArg(kernel, 4,  sizeof(cl_mem), (void *)&data_cost_selected));
+                 openCLSafeCall(clSetKernelArg(kernel, 5,  sizeof(cl_mem), (void *)&disp_selected_pyr));
+                 openCLSafeCall(clSetKernelArg(kernel, 6,  sizeof(cl_mem), (void *)&disp.data));
+                 openCLSafeCall(clSetKernelArg(kernel, 7,  sizeof(cl_int), (void *)&step_size));
+                 openCLSafeCall(clSetKernelArg(kernel, 8,  sizeof(cl_int), (void *)&disp.cols));
+                 openCLSafeCall(clSetKernelArg(kernel, 9,  sizeof(cl_int), (void *)&disp.rows));
+                 openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane));
+                 openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step));
+                 openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
+                 openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
+                     globalThreads, localThreads, 0, NULL, NULL));
+ 
+                 clFinish(*(cl_command_queue*)getoclCommandQueue());
+                 openCLSafeCall(clReleaseKernel(kernel));
+             }
+         }
+     }
+ }
+ namespace
+ {
+     const float DEFAULT_MAX_DATA_TERM = 30.0f;
+     const float DEFAULT_DATA_WEIGHT = 1.0f;
+     const float DEFAULT_MAX_DISC_TERM = 160.0f;
+     const float DEFAULT_DISC_SINGLE_JUMP = 10.0f;
+ }
+ 
+ void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane)
+ {
+     ndisp = (int) ((float) width / 3.14f);
+     if ((ndisp & 1) != 0)
+         ndisp++;
+ 
+     int mm = ::max(width, height);
+     iters = mm / 100 + ((mm > 1200) ? - 4 : 4);
+ 
+     levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
+     if (levels == 0) levels++;
+ 
+     nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
+ }
+ 
+ cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
+     int msg_type_)
+ 
+     : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
+     max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
+     max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0),
+     msg_type(msg_type_), use_local_init_data_cost(true)
+ {
+     CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
+ }
+ 
+ 
+ cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
+     float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_,
+     int min_disp_th_, int msg_type_)
+     : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
+     max_data_term(max_data_term_), data_weight(data_weight_),
+     max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_),
+     msg_type(msg_type_), use_local_init_data_cost(true)
+ {
+     CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
+ }
+ 
+ template<class T>
+ static void csbp_operator(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
+     oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
+     oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp)
+ {
+     CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
+         && left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
+ 
+     CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3));
+ 
+     const Scalar zero = Scalar::all(0);
+ 
+     ////////////////////////////////////Init///////////////////////////////////////////////////
- -    int levels = rthis.levels; 
++    int rows = left.rows;
++    int cols = left.cols;
+ 
+     rthis.levels = min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0)));
- -    nr_plane_pyr[0] = rthis.nr_plane; 
++    int levels = rthis.levels;
+ 
+     AutoBuffer<int> buf(levels * 4);
+ 
+     int *cols_pyr = buf;
+     int *rows_pyr = cols_pyr + levels;
+     int *nr_plane_pyr = rows_pyr + levels;
+     int *step_pyr = nr_plane_pyr + levels;
+ 
+     cols_pyr[0] = cols;
+     rows_pyr[0] = rows;
- -    step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T); 
++    nr_plane_pyr[0] = rthis.nr_plane;
+ 
+     const int n = 64;
- -    disp_selected_pyr[0] = zero; 
++    step_pyr[0] = alignSize(cols * sizeof(T), n) / sizeof(T);
+     for (int i = 1; i < levels; i++)
+     {
+         cols_pyr[i] = cols_pyr[i - 1]  / 2;
+         rows_pyr[i] = rows_pyr[i - 1]/ 2;
+ 
+         nr_plane_pyr[i] = nr_plane_pyr[i - 1] * 2;
+ 
+         step_pyr[i] = alignSize(cols_pyr[i] * sizeof(T), n) / sizeof(T);
+     }
+ 
+     Size msg_size(step_pyr[0], rows * nr_plane_pyr[0]);
+     Size data_cost_size(step_pyr[0], rows * nr_plane_pyr[0] * 2);
+ 
+     u[0].create(msg_size, DataType<T>::type);
+     d[0].create(msg_size, DataType<T>::type);
+     l[0].create(msg_size, DataType<T>::type);
+     r[0].create(msg_size, DataType<T>::type);
+ 
+     u[1].create(msg_size, DataType<T>::type);
+     d[1].create(msg_size, DataType<T>::type);
+     l[1].create(msg_size, DataType<T>::type);
+     r[1].create(msg_size, DataType<T>::type);
+ 
+     disp_selected_pyr[0].create(msg_size, DataType<T>::type);
+     disp_selected_pyr[1].create(msg_size, DataType<T>::type);
+ 
+     data_cost.create(data_cost_size, DataType<T>::type);
+     data_cost_selected.create(msg_size, DataType<T>::type);
+ 
+     Size temp_size = data_cost_size;
+     if (data_cost_size.width * data_cost_size.height < step_pyr[0] * rows_pyr[levels - 1] * rthis.ndisp)
+         temp_size = Size(step_pyr[0], rows_pyr[levels - 1] * rthis.ndisp);
+ 
+     temp.create(temp_size, DataType<T>::type);
+     temp = zero;
+ 
+     ///////////////////////////////// Compute////////////////////////////////////////////////
+ 
+     //csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
+     //   rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
+ 
+     l[0] = zero;
+     d[0] = zero;
+     r[0] = zero;
+     u[0] = zero;
++    disp_selected_pyr[0] = zero;
+ 
+     l[1] = zero;
+     d[1] = zero;
+     r[1] = zero;
+     u[1] = zero;
+     disp_selected_pyr[1] = zero;
+ 
+     data_cost = zero;
+ 
+     data_cost_selected = zero;
+ 
+     int cur_idx = 0;
+ 
+     for (int i = levels - 1; i >= 0; i--)
+     {
+         if (i == levels - 1)
+         {
+             cv::ocl::stereoCSBP::init_data_cost(left, right, temp, rthis, disp_selected_pyr[cur_idx].data,
+                 data_cost_selected.data, step_pyr[0], rows_pyr[i], cols_pyr[i],
+                 i, nr_plane_pyr[i]);
+         }
+         else
+         {
+             cv::ocl::stereoCSBP::compute_data_cost(
+                 disp_selected_pyr[cur_idx].data, data_cost.data, rthis, step_pyr[0],
+                 step_pyr[0], left, right, rows_pyr[i], cols_pyr[i], rows_pyr[i + 1], i,
+                 nr_plane_pyr[i + 1]);
+ 
+             int new_idx = (cur_idx + 1) & 1;
+ 
+             cv::ocl::stereoCSBP::init_message(u[new_idx].data, d[new_idx].data, l[new_idx].data, r[new_idx].data,
+                 u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
+                 disp_selected_pyr[new_idx].data, disp_selected_pyr[cur_idx].data,
+                 data_cost_selected.data, data_cost.data, temp, rthis, step_pyr[0],
+                 step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rows_pyr[i + 1],
+                 cols_pyr[i + 1], nr_plane_pyr[i + 1]);
+             cur_idx = new_idx;
+         }
+         cv::ocl::stereoCSBP::calc_all_iterations(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
+             data_cost_selected.data, disp_selected_pyr[cur_idx].data, temp,
+             rthis, step_pyr[0], rows_pyr[i], cols_pyr[i], nr_plane_pyr[i]);
+     }
+ 
+     if (disp.empty())
+         disp.create(rows, cols, CV_16S);
+ 
+     out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
+     out = zero;
+ 
+     stereoCSBP::compute_disp(u[cur_idx].data, d[cur_idx].data, l[cur_idx].data, r[cur_idx].data,
+         data_cost_selected.data, disp_selected_pyr[cur_idx].data, rthis, step_pyr[0],
+         out, nr_plane_pyr[0]);
+     if (disp.type() != CV_16S)
+         out.convertTo(disp, disp.type());
+ }
+ 
+ 
+ typedef void (*csbp_operator_t)(StereoConstantSpaceBP &rthis, oclMat u[2], oclMat d[2], oclMat l[2], oclMat r[2],
+     oclMat disp_selected_pyr[2], oclMat &data_cost, oclMat &data_cost_selected,
+     oclMat &temp, oclMat &out, const oclMat &left, const oclMat &right, oclMat &disp);
+ 
+ const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
+ 
+ void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &left, const oclMat &right, oclMat &disp)
+ {
+ 
+     CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
+     operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out,
+         left, right, disp);
+ }
+ 
+ #endif /* !defined (HAVE_OPENCL) */
diff --cc modules/ocl/src/stereobm.cpp

index 5eab75a,e620c2a..a5cbe2b
--- 1/modules/ocl/src/stereobm.cpp
--- 2/modules/ocl/src/stereobm.cpp
+++ b/modules/ocl/src/stereobm.cpp
@@@ -72,10 -74,7 +72,7 @@@ namespace stereoB
   ////////////////////////////////////////////////////////////////////////
   static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap)
   {
-     Context *clCxt = input.clCxt;
- 
- -    string kernelName = "prefilter_xsobel";
+ +    String kernelName = "prefilter_xsobel";
-     cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
   
       size_t blockSize = 1;
       size_t globalThreads[3] = { input.cols, input.rows, 1 };
@@@ -113,10 -108,7 +106,7 @@@ static void stereo_bm(const oclMat &lef
   {
       int winsz2 = winSize >> 1;
   
-     Context *clCxt = left.clCxt;
- 
- -    string kernelName = "stereoKernel";
+ +    String kernelName = "stereoKernel";
-     cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
   
       disp.setTo(Scalar_<unsigned char>::all(0));
       minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF));
@@@ -158,10 -147,7 +145,7 @@@
   static void postfilter_textureness(oclMat &left, int winSize,
                               float avergeTexThreshold, oclMat &disparity)
   {
-     Context *clCxt = left.clCxt;
- 
- -    string kernelName = "textureness_kernel";
+ +    String kernelName = "textureness_kernel";
-     cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
   
       size_t blockSize = 1;
       size_t localThreads[]  = { BLOCK_W, blockSize ,1};
diff --cc modules/ocl/src/tvl1flow.cpp

index 0000000,8182f41..8362672

mode 000000,100644..100644
--- /dev/null
--- 2/modules/ocl/src/tvl1flow.cpp
+++ b/modules/ocl/src/tvl1flow.cpp
@@@ -1,0 -1,475 +1,475 @@@
- -    void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, 
- -        oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, 
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // @Authors
+ //            Jin Ma, jin@multicorewareinc.com
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other oclMaterials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ 
+ #include "precomp.hpp"
+ using namespace std;
+ using namespace cv;
+ using namespace cv::ocl;
+ 
+ namespace cv
+ {
+     namespace ocl
+     {
+         ///////////////////////////OpenCL kernel strings///////////////////////////
+         extern const char* tvl1flow;
+     }
+ }
+ 
+ cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL()
+ {
+     tau            = 0.25;
+     lambda         = 0.15;
+     theta          = 0.3;
+     nscales        = 5;
+     warps          = 5;
+     epsilon        = 0.01;
+     iterations     = 300;
+     useInitialFlow = false;
+ }
+ 
+ void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy)
+ {
+     CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
+     CV_Assert( I0.size() == I1.size() );
+     CV_Assert( I0.type() == I1.type() );
+     CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
+     CV_Assert( nscales > 0 );
+ 
+     // allocate memory for the pyramid structure
+     I0s.resize(nscales);
+     I1s.resize(nscales);
+     u1s.resize(nscales);
+     u2s.resize(nscales);
+     //I0s_step == I1s_step
+     I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
+     I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
+ 
+ 
+     if (!useInitialFlow)
+     {
+         flowx.create(I0.size(), CV_32FC1);
+         flowy.create(I0.size(), CV_32FC1);
+     }
+     //u1s_step != u2s_step
+     u1s[0] = flowx;
+     u2s[0] = flowy;
+ 
+     I1x_buf.create(I0.size(), CV_32FC1);
+     I1y_buf.create(I0.size(), CV_32FC1);
+ 
+     I1w_buf.create(I0.size(), CV_32FC1);
+     I1wx_buf.create(I0.size(), CV_32FC1);
+     I1wy_buf.create(I0.size(), CV_32FC1);
+ 
+     grad_buf.create(I0.size(), CV_32FC1);
+     rho_c_buf.create(I0.size(), CV_32FC1);
+ 
+     p11_buf.create(I0.size(), CV_32FC1);
+     p12_buf.create(I0.size(), CV_32FC1);
+     p21_buf.create(I0.size(), CV_32FC1);
+     p22_buf.create(I0.size(), CV_32FC1);
+ 
+     diff_buf.create(I0.size(), CV_32FC1);
+ 
+     // create the scales
+     for (int s = 1; s < nscales; ++s)
+     {
+         ocl::pyrDown(I0s[s - 1], I0s[s]);
+         ocl::pyrDown(I1s[s - 1], I1s[s]);
+ 
+         if (I0s[s].cols < 16 || I0s[s].rows < 16)
+         {
+             nscales = s;
+             break;
+         }
+ 
+         if (useInitialFlow)
+         {
+             ocl::pyrDown(u1s[s - 1], u1s[s]);
+             ocl::pyrDown(u2s[s - 1], u2s[s]);
+ 
+             //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]);
+             multiply(0.5, u1s[s], u1s[s]);
+             //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]);
+             multiply(0.5, u1s[s], u2s[s]);
+         }
+     }
+ 
+     // pyramidal structure for computing the optical flow
+     for (int s = nscales - 1; s >= 0; --s)
+     {
+         // compute the optical flow at the current scale
+         procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]);
+ 
+         // if this was the last scale, finish now
+         if (s == 0)
+             break;
+ 
+         // otherwise, upsample the optical flow
+ 
+         // zoom the optical flow for the next finer scale
+         ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
+         ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
+ 
+         // scale the optical flow with the appropriate zoom factor
+         multiply(2, u1s[s - 1], u1s[s - 1]);
+         multiply(2, u2s[s - 1], u2s[s - 1]);
+ 
+     }
+ 
+ }
+ 
+ namespace ocl_tvl1flow
+ {
+     void centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy);
+ 
- -    void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, 
- -        oclMat &rho_c, oclMat &p11, oclMat &p12, 
- -        oclMat &p21, oclMat &p22, oclMat &u1, 
++    void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y,
++        oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy,
+         oclMat &grad, oclMat &rho);
+ 
- -    void estimateDualVariables(oclMat &u1, oclMat &u2, 
++    void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
++        oclMat &rho_c, oclMat &p11, oclMat &p12,
++        oclMat &p21, oclMat &p22, oclMat &u1,
+         oclMat &u2, oclMat &error, float l_t, float theta);
+ 
- -            estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, 
++    void estimateDualVariables(oclMat &u1, oclMat &u2,
+         oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
+ }
+ 
+ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2)
+ {
+     using namespace ocl_tvl1flow;
+ 
+     const double scaledEpsilon = epsilon * epsilon * I0.size().area();
+ 
+     CV_DbgAssert( I1.size() == I0.size() );
+     CV_DbgAssert( I1.type() == I0.type() );
+     CV_DbgAssert( u1.empty() || u1.size() == I0.size() );
+     CV_DbgAssert( u2.size() == u1.size() );
+ 
+     if (u1.empty())
+     {
+         u1.create(I0.size(), CV_32FC1);
+         u1.setTo(Scalar::all(0));
+ 
+         u2.create(I0.size(), CV_32FC1);
+         u2.setTo(Scalar::all(0));
+     }
+ 
+     oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
+     oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
+ 
+     centeredGradient(I1, I1x, I1y);
+ 
+     oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
+     oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
+     oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
+ 
+     oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
+     oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
+ 
+     oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
+     oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
+     oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
+     oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
+     p11.setTo(Scalar::all(0));
+     p12.setTo(Scalar::all(0));
+     p21.setTo(Scalar::all(0));
+     p22.setTo(Scalar::all(0));
+ 
+     oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
+ 
+     const float l_t = static_cast<float>(lambda * theta);
+     const float taut = static_cast<float>(tau / theta);
+ 
+     for (int warpings = 0; warpings < warps; ++warpings)
+     {
+         warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
+ 
+         double error = numeric_limits<double>::max();
+         for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
+         {
- -    string kernelName = "centeredGradientKernel";
++            estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
+                 u1, u2, diff, l_t, static_cast<float>(theta));
+ 
+             error = ocl::sum(diff)[0];
+ 
+             estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
+ 
+         }
+     }
+ 
+ }
+ 
+ void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
+ {
+     I0s.clear();
+     I1s.clear();
+     u1s.clear();
+     u2s.clear();
+ 
+     I1x_buf.release();
+     I1y_buf.release();
+ 
+     I1w_buf.release();
+     I1wx_buf.release();
+     I1wy_buf.release();
+ 
+     grad_buf.release();
+     rho_c_buf.release();
+ 
+     p11_buf.release();
+     p12_buf.release();
+     p21_buf.release();
+     p22_buf.release();
+ 
+     diff_buf.release();
+     norm_buf.release();
+ }
+ 
+ void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy)
+ {
+     Context  *clCxt = src.clCxt;
+     size_t localThreads[3] = {32, 8, 1};
+     size_t globalThreads[3] = {src.cols, src.rows, 1};
+ 
+     int srcElementSize = src.elemSize();
+     int src_step = src.step/srcElementSize;
+ 
+     int dElememntSize = dx.elemSize();
+     int dx_step = dx.step/dElememntSize;
+ 
- -    size_t globalThread[] = 
++    String kernelName = "centeredGradientKernel";
+     vector< pair<size_t, const void *> > args;
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&src_step));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step));
+     openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1);
+ 
+ }
+ 
+ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut)
+ {
+     Context *clCxt = u1.clCxt;
+ 
+     size_t localThread[] = {32, 8, 1};
- -        u1.cols, 
++    size_t globalThread[] =
+     {
- -    string kernelName = "estimateDualVariablesKernel";
++        u1.cols,
+         u1.rows,
+         1
+     };
+ 
+     int u1_element_size = u1.elemSize();
+     int u1_step = u1.step/u1_element_size;
+ 
+     int u2_element_size = u2.elemSize();
+     int u2_step = u2.step/u2_element_size;
+ 
+     int p11_element_size = p11.elemSize();
+     int p11_step = p11.step/p11_element_size;
+ 
+     int u1_offset_y = u1.offset/u1.step;
+     int u1_offset_x = u1.offset%u1.step;
+     u1_offset_x = u1_offset_x/u1.elemSize();
+ 
+     int u2_offset_y = u2.offset/u2.step;
+     int u2_offset_x = u2.offset%u2.step;
+     u2_offset_x = u2_offset_x/u2.elemSize();
+ 
- -void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, 
- -    oclMat &rho_c, oclMat &p11, oclMat &p12, 
- -    oclMat &p21, oclMat &p22, oclMat &u1, 
++    String kernelName = "estimateDualVariablesKernel";
+     vector< pair<size_t, const void *> > args;
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+     args.push_back( make_pair( sizeof(cl_float), (void*)&taut));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+ 
+     openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+ }
+ 
- -    size_t globalThread[] = 
++void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
++    oclMat &rho_c, oclMat &p11, oclMat &p12,
++    oclMat &p21, oclMat &p22, oclMat &u1,
+     oclMat &u2, oclMat &error, float l_t, float theta)
+ {
+     Context* clCxt = I1wx.clCxt;
+ 
+     size_t localThread[] = {32, 8, 1};
- -        I1wx.cols, 
++    size_t globalThread[] =
+     {
- -    string kernelName = "estimateUKernel";
++        I1wx.cols,
+         I1wx.rows,
+         1
+     };
+ 
+     int I1wx_element_size = I1wx.elemSize();
+     int I1wx_step = I1wx.step/I1wx_element_size;
+ 
+     int u1_element_size = u1.elemSize();
+     int u1_step = u1.step/u1_element_size;
+ 
+     int u2_element_size = u2.elemSize();
+     int u2_step = u2.step/u2_element_size;
+ 
+     int u1_offset_y = u1.offset/u1.step;
+     int u1_offset_x = u1.offset%u1.step;
+     u1_offset_x = u1_offset_x/u1.elemSize();
+ 
+     int u2_offset_y = u2.offset/u2.step;
+     int u2_offset_x = u2.offset%u2.step;
+     u2_offset_x = u2_offset_x/u2.elemSize();
+ 
- -    
++    String kernelName = "estimateUKernel";
+     vector< pair<size_t, const void *> > args;
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data));
+     args.push_back( make_pair( sizeof(cl_float), (void*)&l_t));
+     args.push_back( make_pair( sizeof(cl_float), (void*)&theta));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+ 
+     openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+ }
+ 
+ void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho)
+ {
+     Context* clCxt = I0.clCxt;
+     const bool isImgSupported = support_image2d(clCxt);
- -    size_t globalThread[] = 
++
+     CV_Assert(isImgSupported);
+ 
+     int u1ElementSize = u1.elemSize();
+     int u1Step = u1.step/u1ElementSize;
+ 
+     int u2ElementSize = u2.elemSize();
+     int u2Step = u2.step/u2ElementSize;
+ 
+     int I0ElementSize = I0.elemSize();
+     int I0Step = I0.step/I0ElementSize;
+ 
+     int I1w_element_size = I1w.elemSize();
+     int I1w_step = I1w.step/I1w_element_size;
+ 
+     int u1_offset_y = u1.offset/u1.step;
+     int u1_offset_x = u1.offset%u1.step;
+     u1_offset_x = u1_offset_x/u1.elemSize();
+ 
+     int u2_offset_y = u2.offset/u2.step;
+     int u2_offset_x = u2.offset%u2.step;
+     u2_offset_x = u2_offset_x/u2.elemSize();
+ 
+     size_t localThread[] = {32, 8, 1};
- -        I0.cols, 
++    size_t globalThread[] =
+     {
- -    string kernelName = "warpBackwardKernel";
++        I0.cols,
+         I0.rows,
+         1
+     };
+ 
+     cl_mem I1_tex;
+     cl_mem I1x_tex;
+     cl_mem I1y_tex;
+     I1_tex = bindTexture(I1);
+     I1x_tex = bindTexture(I1x);
+     I1y_tex = bindTexture(I1y);
+ 
- -}
++    String kernelName = "warpBackwardKernel";
+     vector< pair<size_t, const void *> > args;
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+     args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+     args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+ 
+     openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
++}
diff --cc modules/ocl/test/precomp.hpp

index 56efdab,090c9b3..72d5089
--- 1/modules/ocl/test/precomp.hpp
--- 2/modules/ocl/test/precomp.hpp
+++ b/modules/ocl/test/precomp.hpp
@@@ -61,16 -61,17 +61,14 @@@
   #include <iterator>
   #include <string>
   #include <cstdarg>
- -#include "cvconfig.h"
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -//#include "opencv2/calib3d/calib3d.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/video/video.hpp"
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/ocl/ocl.hpp"
+ +#include "opencv2/ts.hpp"
+ +#include "opencv2/highgui.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/video.hpp"
+ +#include "opencv2/ocl.hpp"
   
   #include "utility.hpp"
- #include "interpolation.hpp"
- -//#include "add_test_info.h"
   
- -#endif
+ +#include "opencv2/core/private.hpp"
   
- 
+ +#endif
diff --cc modules/ocl/test/test_brute_force_matcher.cpp
Simple merge
diff --cc modules/ocl/test/test_color.cpp
Simple merge
diff --cc modules/ocl/test/test_fft.cpp
Simple merge
diff --cc modules/ocl/test/test_haar.cpp

index 905160c,96f7211..652109d
--- 1/modules/ocl/test/test_haar.cpp
--- 2/modules/ocl/test/test_haar.cpp
+++ b/modules/ocl/test/test_haar.cpp
@@@ -105,23 -98,11 +98,11 @@@ TEST_P(Haar, FaceDetect
           return ;
       }
   
-     //int i = 0;
-     //double t = 0;
       vector<Rect> faces, oclfaces;
   
-     // const static Scalar colors[] =  { CV_RGB(0, 0, 255),
-     //                                   CV_RGB(0, 128, 255),
-     //                                   CV_RGB(0, 255, 255),
-     //                                   CV_RGB(0, 255, 0),
-     //                                   CV_RGB(255, 128, 0),
-     //                                   CV_RGB(255, 255, 0),
-     //                                   CV_RGB(255, 0, 0),
-     //                                   CV_RGB(255, 0, 255)
-     //                                 } ;
- 
       Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
       MemStorage storage(cvCreateMemStorage(0));
- -    cvtColor( img, gray, CV_BGR2GRAY );
+ +    cvtColor( img, gray, COLOR_BGR2GRAY );
       resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
       equalizeHist( smallImg, smallImg );
   
diff --cc modules/ocl/test/test_hog.cpp
Simple merge
diff --cc modules/ocl/test/test_imgproc.cpp
Simple merge
diff --cc modules/ocl/test/test_moments.cpp
Simple merge
diff --cc modules/ocl/test/test_pyrup.cpp
Simple merge
diff --cc modules/ocl/test/utility.hpp
Simple merge
diff --cc modules/superres/perf/perf_precomp.hpp

index c15ed2f,f052037..f92d8e4
--- 1/modules/superres/perf/perf_precomp.hpp
--- 2/modules/superres/perf/perf_precomp.hpp
+++ b/modules/superres/perf/perf_precomp.hpp
@@@ -51,11 -51,16 +51,11 @@@
   #ifndef __OPENCV_PERF_PRECOMP_HPP__
   #define __OPENCV_PERF_PRECOMP_HPP__
   
- -#ifdef HAVE_CVCONFIG_H
- -#include "cvconfig.h"
- -#endif
- -
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/ts/gpu_perf.hpp"
- -
- -#include "opencv2/core/core.hpp"
+ +#include "opencv2/core.hpp"
   #include "opencv2/core/gpumat.hpp"
- #include "opencv2/ts/ts_perf.hpp"
- -#include "opencv2/superres/superres.hpp"
++#include "opencv2/ts.hpp"
+ +#include "opencv2/ts/gpu_perf.hpp"
+ +#include "opencv2/superres.hpp"
   #include "opencv2/superres/optical_flow.hpp"
   
   #ifdef GTEST_CREATE_SHARED_LIBRARY
diff --cc modules/ts/include/opencv2/ts.hpp

index b0416da,fcef589..84fb7b0
--- 1/modules/ts/include/opencv2/ts.hpp
--- 2/modules/ts/include/opencv2/ts/ts.hpp
+++ b/modules/ts/include/opencv2/ts.hpp
@@@ -531,8 -554,21 +531,9 @@@ namespace cvtes
   CV_EXPORTS void fillGradient(Mat& img, int delta = 5);
   CV_EXPORTS void smoothBorder(Mat& img, const Scalar& color, int delta = 3);
   
+ CV_EXPORTS void printVersionInfo(bool useStdOut = true);
   } //namespace cvtest
   
- -// fills c with zeros
- -CV_EXPORTS void cvTsZero( CvMat* c, const CvMat* mask=0 );
- -
- -// copies a to b (whole matrix or only the selected region)
- -CV_EXPORTS void cvTsCopy( const CvMat* a, CvMat* b, const CvMat* mask=0 );
- -
- -// converts one array to another
- -CV_EXPORTS void  cvTsConvert( const CvMat* src, CvMat* dst );
- -
- -CV_EXPORTS void cvTsGEMM( const CvMat* a, const CvMat* b, double alpha,
- -                         const CvMat* c, double beta, CvMat* d, int flags );
- -
   #define CV_TEST_MAIN(resourcesubdir) \
   int main(int argc, char **argv) \
   { \
diff --cc modules/ts/include/opencv2/ts/ts_perf.hpp
Simple merge
diff --cc modules/ts/src/ts_func.cpp

index 4b48cd9,1d636e6..0f3751e
--- 1/modules/ts/src/ts_func.cpp
--- 2/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@@@ -2937,4 -2934,42 +2937,30 @@@ MatComparator::operator()(const char* e
       << "'" << expr2 << "': " << MatPart(m2part, border > 0 ? &loc : 0) << ".\n";
   }
   
- -} //namespace cvtest
- -
- -void cvTsConvert( const CvMat* src, CvMat* dst )
- -{
- -    Mat _src = cvarrToMat(src), _dst = cvarrToMat(dst);
- -    cvtest::convert(_src, _dst, _dst.depth());
- -}
- -
- -void cvTsZero( CvMat* dst, const CvMat* mask )
- -{
- -    Mat _dst = cvarrToMat(dst), _mask = mask ? cvarrToMat(mask) : Mat();
- -    cvtest::set(_dst, Scalar::all(0), _mask);
+ void printVersionInfo(bool useStdOut)
+ {
+     ::testing::Test::RecordProperty("CV_VERSION", CV_VERSION);
+     if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl;
+ 
+     std::string buildInfo( cv::getBuildInformation() );
+ 
+     size_t pos1 = buildInfo.find("Version control");
+     size_t pos2 = buildInfo.find("\n", pos1);\
+     if(pos1 != std::string::npos && pos2 != std::string::npos)
+     {
+         std::string ver( buildInfo.substr(pos1, pos2-pos1) );
+         ::testing::Test::RecordProperty("Version_control", ver);
+         if(useStdOut) std::cout << ver << std::endl;
+     }
+ 
+     pos1 = buildInfo.find("inner version");
+     pos2 = buildInfo.find("\n", pos1);\
+     if(pos1 != std::string::npos && pos2 != std::string::npos)
+     {
+         std::string ver( buildInfo.substr(pos1, pos2-pos1) );
+         ::testing::Test::RecordProperty("inner_version", ver);
+         if(useStdOut) std::cout << ver << std::endl;
+     }
+ }
+ 
   }
author	Roman Donchenko <roman.donchenko@itseez.com>
	Tue, 21 May 2013 10:35:24 +0000 (14:35 +0400)
committer	Roman Donchenko <roman.donchenko@itseez.com>
	Tue, 21 May 2013 13:19:23 +0000 (17:19 +0400)
		1	2
3rdparty/libjasper/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/OpenCVDetectAndroidSDK.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/OpenCVDetectPython.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
modules/calib3d/doc/camera_calibration_and_3d_reconstruction.rst	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/matmul.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/include/opencv2/highgui/highgui_c.h	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/src/grfmt_jpeg.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/src/grfmt_png.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst	patch \|	diff1 \|	diff2 \|	blob \| history
modules/imgproc/src/floodfill.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/imgproc/src/imgwarp.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/objdetect/src/cascadedetect.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/include/opencv2/ocl.hpp	patch \|	diff1 \|	\|	blob \| history
modules/ocl/include/opencv2/ocl/ocl.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/arithm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/brute_force_matcher.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/gemm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/haar.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/imgproc.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/initialization.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/matrix_operations.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/mcwutil.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/moments.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/arithm_bitwise_binary.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/haarobjectdetect.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/moments.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/stereobp.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/stereo_csbp.cpp	patch \|	\|	diff2 \|	blob \| history
modules/ocl/src/stereobm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/tvl1flow.cpp	patch \|	\|	diff2 \|	blob \| history
modules/ocl/test/precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_brute_force_matcher.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_color.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_fft.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_haar.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_hog.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_imgproc.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_moments.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_pyrup.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/utility.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/superres/perf/perf_precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ts/include/opencv2/ts.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ts/include/opencv2/ts/ts_perf.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ts/src/ts_func.cpp	patch \|	diff1 \|	diff2 \|	blob \| history