Merge branch '2.4'

author Andrey Kamaev <andrey.kamaev@itseez.com>

Thu, 21 Mar 2013 16:59:18 +0000 (20:59 +0400)

committer Andrey Kamaev <andrey.kamaev@itseez.com>

Thu, 21 Mar 2013 19:11:54 +0000 (23:11 +0400)
author Andrey Kamaev <andrey.kamaev@itseez.com>
Thu, 21 Mar 2013 16:59:18 +0000 (20:59 +0400)
committer Andrey Kamaev <andrey.kamaev@itseez.com>
Thu, 21 Mar 2013 19:11:54 +0000 (23:11 +0400)
diff --cc CMakeLists.txt
Simple merge
diff --cc cmake/OpenCVModule.cmake

index 48aa713,abb0393..75f91a1
--- 1/cmake/OpenCVModule.cmake
--- 2/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@@ -427,29 -427,28 +427,41 @@@ endmacro(
   # Usage:
   # ocv_glob_module_sources(<extra sources&headers in the same format as used in ocv_set_module_sources>)
   macro(ocv_glob_module_sources)
- -  file(GLOB_RECURSE lib_srcs "src/*.cpp")
- -  file(GLOB_RECURSE lib_int_hdrs "src/*.hpp" "src/*.h")
- -  file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
+ +  file(GLOB lib_srcs     "src/*.cpp")
+ +  file(GLOB lib_int_hdrs "src/*.hpp" "src/*.h")
+ +  file(GLOB lib_hdrs     "include/opencv2/*.hpp" "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
     file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
   
- -  file(GLOB cl_kernels "src/opencl/*.cl")
+ +  file(GLOB lib_device_srcs "src/cuda/*.cu")
+ +  set(device_objs "")
+ +  set(lib_device_hdrs "")
   
- -  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
- -  source_group("Include" FILES ${lib_hdrs})
- -  source_group("Include\\detail" FILES ${lib_hdrs_detail})
+ +  if (HAVE_CUDA AND lib_device_srcs)
+ +    ocv_include_directories(${CUDA_INCLUDE_DIRS})
+ +    file(GLOB lib_device_hdrs "src/cuda/*.hpp")
+ +
+ +    ocv_cuda_compile(device_objs ${lib_device_srcs} ${lib_device_hdrs})
+ +    source_group("Src\\Cuda"      FILES ${lib_device_srcs} ${lib_device_hdrs})
+ +  endif()
+ +
++  file(GLOB cl_kernels "src/opencl/*.cl")
+ 
+   if(HAVE_OPENCL AND cl_kernels)
+     ocv_include_directories(${OPENCL_INCLUDE_DIRS})
+     add_custom_command(
+       OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp"
+       COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
+       DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
+     source_group("Src\\OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
+     list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
+   endif()
+ 
- -  ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail} SOURCES ${lib_srcs} ${lib_int_hdrs})
+ +  ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
+ +                                 SOURCES ${lib_srcs} ${lib_int_hdrs} ${device_objs} ${lib_device_srcs} ${lib_device_hdrs})
+ +
+ +  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
+ +  source_group("Include" FILES ${lib_hdrs})
+ +  source_group("Include\\detail" FILES ${lib_hdrs_detail})
   endmacro()
   
   # creates OpenCV module in current folder
@@@ -462,9 -461,9 +474,12 @@@ macro(ocv_create_module
   
     if(NOT "${ARGN}" STREQUAL "SKIP_LINK")
       target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
+ +    if (HAVE_CUDA)
+ +      target_link_libraries(${the_module} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
+ +    endif()
+     if(HAVE_OPENCL AND OPENCL_LIBRARIES)
+       target_link_libraries(${the_module} ${OPENCL_LIBRARIES})
+     endif()
     endif()
   
     add_dependencies(opencv_modules ${the_module})
diff --cc modules/calib3d/src/stereobm.cpp
Simple merge
diff --cc modules/core/src/lapack.cpp
Simple merge
diff --cc modules/core/test/test_math.cpp

index e02f78c,c3d88bd..bbe754b
--- 1/modules/core/test/test_math.cpp
--- 2/modules/core/test/test_math.cpp
+++ b/modules/core/test/test_math.cpp
@@@ -2597,8 -2486,36 +2597,37 @@@ TEST(Core_SVD, accuracy) { Core_SVDTes
   TEST(Core_SVBkSb, accuracy) { Core_SVBkSbTest test; test.safe_run(); }
   TEST(Core_Trace, accuracy) { Core_TraceTest test; test.safe_run(); }
   TEST(Core_SolvePoly, accuracy) { Core_SolvePolyTest test; test.safe_run(); }
+ +TEST(Core_Phase, accuracy) { Core_PhaseTest test; test.safe_run(); }
   
+ 
+ TEST(Core_SVD, flt)
+ {
+     float a[] = {
+     1.23377746e+011f, -7.05490125e+010f, -4.18380882e+010f, -11693456.f,
+     -39091328.f, 77492224.f, -7.05490125e+010f, 2.36211143e+011f,
+     -3.51093473e+010f, 70773408.f, -4.83386156e+005f, -129560368.f,
+     -4.18380882e+010f, -3.51093473e+010f, 9.25311222e+010f, -49052424.f,
+     43922752.f, 12176842.f, -11693456.f, 70773408.f, -49052424.f, 8.40836094e+004f,
+     5.17475293e+003f, -1.16122949e+004f, -39091328.f, -4.83386156e+005f,
+     43922752.f, 5.17475293e+003f, 5.16047969e+004f, 5.68887842e+003f, 77492224.f,
+     -129560368.f, 12176842.f, -1.16122949e+004f, 5.68887842e+003f,
+     1.28060578e+005f
+     };
+ 
+     float b[] = {
+     283751232.f, 2.61604198e+009f, -745033216.f, 2.31125625e+005f,
+     -4.52429188e+005f, -1.37596525e+006f
+     };
+ 
+     Mat A(6, 6, CV_32F, a);
+     Mat B(6, 1, CV_32F, b);
+     Mat X, B1;
+     solve(A, B, X, DECOMP_SVD);
+     B1 = A*X;
+     EXPECT_LE(norm(B1, B, NORM_L2 + NORM_RELATIVE), FLT_EPSILON*10);
+ }
+ 
+ 
   // TODO: eigenvv, invsqrt, cbrt, fastarctan, (round, floor, ceil(?)),
   
   
diff --cc modules/gpu/CMakeLists.txt
Simple merge
diff --cc modules/gpu/doc/feature_detection_and_description.rst
Simple merge
diff --cc modules/gpu/include/opencv2/gpu.hpp

index 21a03dc,0000000..e093334

mode 100644,000000..100644
--- 1/modules/gpu/include/opencv2/gpu.hpp
--- /dev/null
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@@ -1,2454 -1,0 +1,2365 @@@
- ////////////////////////////////// SURF //////////////////////////////////////////
- 
- class CV_EXPORTS SURF_GPU
- {
- public:
-     enum KeypointLayout
-     {
-         X_ROW = 0,
-         Y_ROW,
-         LAPLACIAN_ROW,
-         OCTAVE_ROW,
-         SIZE_ROW,
-         ANGLE_ROW,
-         HESSIAN_ROW,
-         ROWS_COUNT
-     };
- 
-     //! the default constructor
-     SURF_GPU();
-     //! the full constructor taking all the necessary parameters
-     explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4,
-          int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
- 
-     //! returns the descriptor size in float's (64 or 128)
-     int descriptorSize() const;
- 
-     //! upload host keypoints to device memory
-     static void uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU);
-     //! download keypoints from device to host memory
-     static void downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints);
- 
-     //! download descriptors from device to host memory
-     static void downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors);
- 
-     //! finds the keypoints using fast hessian detector used in SURF
-     //! supports CV_8UC1 images
-     //! keypoints will have nFeature cols and 6 rows
-     //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
-     //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
-     //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
-     //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
-     //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
-     //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
-     //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
-     void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints);
-     //! finds the keypoints and computes their descriptors.
-     //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
-     void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
-         bool useProvidedKeypoints = false);
- 
-     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
-     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
-         bool useProvidedKeypoints = false);
- 
-     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
-         bool useProvidedKeypoints = false);
- 
-     void releaseMemory();
- 
-     // SURF parameters
-     double hessianThreshold;
-     int nOctaves;
-     int nOctaveLayers;
-     bool extended;
-     bool upright;
- 
-     //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
-     float keypointsRatio;
- 
-     GpuMat sum, mask1, maskSum, intBuffer;
- 
-     GpuMat det, trace;
- 
-     GpuMat maxPosBuffer;
- };
- 
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other GpuMaterials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_GPU_HPP__
+ +#define __OPENCV_GPU_HPP__
+ +
+ +#ifndef SKIP_INCLUDES
+ +#include <vector>
+ +#include <memory>
+ +#include <iosfwd>
+ +#endif
+ +
+ +#include "opencv2/core/gpumat.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/objdetect.hpp"
+ +#include "opencv2/features2d.hpp"
+ +
+ +namespace cv { namespace gpu {
+ +//////////////////////////////// Filter Engine ////////////////////////////////
+ +
+ +/*!
+ +The Base Class for 1D or Row-wise Filters
+ +
+ +This is the base class for linear or non-linear filters that process 1D data.
+ +In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+ +*/
+ +class CV_EXPORTS BaseRowFilter_GPU
+ +{
+ +public:
+ +    BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
+ +    virtual ~BaseRowFilter_GPU() {}
+ +    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+ +    int ksize, anchor;
+ +};
+ +
+ +/*!
+ +The Base Class for Column-wise Filters
+ +
+ +This is the base class for linear or non-linear filters that process columns of 2D arrays.
+ +Such filters are used for the "vertical" filtering parts in separable filters.
+ +*/
+ +class CV_EXPORTS BaseColumnFilter_GPU
+ +{
+ +public:
+ +    BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
+ +    virtual ~BaseColumnFilter_GPU() {}
+ +    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+ +    int ksize, anchor;
+ +};
+ +
+ +/*!
+ +The Base Class for Non-Separable 2D Filters.
+ +
+ +This is the base class for linear or non-linear 2D filters.
+ +*/
+ +class CV_EXPORTS BaseFilter_GPU
+ +{
+ +public:
+ +    BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {}
+ +    virtual ~BaseFilter_GPU() {}
+ +    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+ +    Size ksize;
+ +    Point anchor;
+ +};
+ +
+ +/*!
+ +The Base Class for Filter Engine.
+ +
+ +The class can be used to apply an arbitrary filtering operation to an image.
+ +It contains all the necessary intermediate buffers.
+ +*/
+ +class CV_EXPORTS FilterEngine_GPU
+ +{
+ +public:
+ +    virtual ~FilterEngine_GPU() {}
+ +
+ +    virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0;
+ +};
+ +
+ +//! returns the non-separable filter engine with the specified filter
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU>& filter2D, int srcType, int dstType);
+ +
+ +//! returns the separable filter engine with the specified filters
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
+ +    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
+ +    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf);
+ +
+ +//! returns horizontal 1D box filter
+ +//! supports only CV_8UC1 source type and CV_32FC1 sum type
+ +CV_EXPORTS Ptr<BaseRowFilter_GPU> getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1);
+ +
+ +//! returns vertical 1D box filter
+ +//! supports only CV_8UC1 sum type and CV_32FC1 dst type
+ +CV_EXPORTS Ptr<BaseColumnFilter_GPU> getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1);
+ +
+ +//! returns 2D box filter
+ +//! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
+ +CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1));
+ +
+ +//! returns box filter engine
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size& ksize,
+ +    const Point& anchor = Point(-1,-1));
+ +
+ +//! returns 2D morphological filter
+ +//! only MORPH_ERODE and MORPH_DILATE are supported
+ +//! supports CV_8UC1 and CV_8UC4 types
+ +//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+ +CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize,
+ +    Point anchor=Point(-1,-1));
+ +
+ +//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel,
+ +    const Point& anchor = Point(-1,-1), int iterations = 1);
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
+ +    const Point& anchor = Point(-1,-1), int iterations = 1);
+ +
+ +//! returns 2D filter with the specified kernel
+ +//! supports CV_8U, CV_16U and CV_32F one and four channel image
+ +CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +//! returns the non-separable linear filter engine
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
+ +    Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
+ +
+ +//! returns the primitive row filter with the specified kernel.
+ +//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
+ +//! there are two version of algorithm: NPP and OpenCV.
+ +//! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,
+ +//! otherwise calls OpenCV version.
+ +//! NPP supports only BORDER_CONSTANT border type.
+ +//! OpenCV version supports only CV_32F as buffer depth and
+ +//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
+ +CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
+ +    int anchor = -1, int borderType = BORDER_DEFAULT);
+ +
+ +//! returns the primitive column filter with the specified kernel.
+ +//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
+ +//! there are two version of algorithm: NPP and OpenCV.
+ +//! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,
+ +//! otherwise calls OpenCV version.
+ +//! NPP supports only BORDER_CONSTANT border type.
+ +//! OpenCV version supports only CV_32F as buffer depth and
+ +//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
+ +CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
+ +    int anchor = -1, int borderType = BORDER_DEFAULT);
+ +
+ +//! returns the separable linear filter engine
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
+ +    const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
+ +    int columnBorderType = -1);
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
+ +    const Mat& columnKernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
+ +    int columnBorderType = -1);
+ +
+ +//! returns filter engine for the generalized Sobel operator
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize,
+ +                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf,
+ +                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +
+ +//! returns the Gaussian filter engine
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0,
+ +                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
+ +                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +
+ +//! returns maximum filter
+ +CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
+ +
+ +//! returns minimum filter
+ +CV_EXPORTS Ptr<BaseFilter_GPU> getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
+ +
+ +//! smooths the image using the normalized box filter
+ +//! supports CV_8UC1, CV_8UC4 types
+ +CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
+ +
+ +//! a synonym for normalized box filter
+ +static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null())
+ +{
+ +    boxFilter(src, dst, -1, ksize, anchor, stream);
+ +}
+ +
+ +//! erodes the image (applies the local minimum operator)
+ +CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+ +CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
+ +                      Point anchor = Point(-1, -1), int iterations = 1,
+ +                      Stream& stream = Stream::Null());
+ +
+ +//! dilates the image (applies the local maximum operator)
+ +CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+ +CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
+ +                       Point anchor = Point(-1, -1), int iterations = 1,
+ +                       Stream& stream = Stream::Null());
+ +
+ +//! applies an advanced morphological operation to the image
+ +CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+ +CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
+ +                             Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
+ +
+ +//! applies non-separable 2D linear filter to the image
+ +CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
+ +
+ +//! applies separable 2D linear filter to the image
+ +CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
+ +                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
+ +                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1,
+ +                            Stream& stream = Stream::Null());
+ +
+ +//! applies generalized Sobel operator to the image
+ +CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
+ +                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize = 3, double scale = 1,
+ +                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+ +
+ +//! applies the vertical or horizontal Scharr operator to the image
+ +CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
+ +                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale = 1,
+ +                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+ +
+ +//! smooths the image using Gaussian filter.
+ +CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
+ +                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+ +CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
+ +                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+ +
+ +//! applies Laplacian operator to the image
+ +//! supports only ksize = 1 and ksize = 3
+ +CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
+ +
+ +
+ +////////////////////////////// Arithmetics ///////////////////////////////////
+ +
+ +//! implements generalized matrix product algorithm GEMM from BLAS
+ +CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha,
+ +    const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null());
+ +
+ +//! transposes the matrix
+ +//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc)
+ +CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! reverses the order of the rows, columns or both in a matrix
+ +//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth
+ +CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null());
+ +
+ +//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+ +//! destination array will have the depth type as lut and the same channels number as source
+ +//! supports CV_8UC1, CV_8UC3 types
+ +CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! makes multi-channel array out of several single-channel arrays
+ +CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! makes multi-channel array out of several single-channel arrays
+ +CV_EXPORTS void merge(const std::vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! copies each plane of a multi-channel array to a dedicated array
+ +CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null());
+ +
+ +//! copies each plane of a multi-channel array to a dedicated array
+ +CV_EXPORTS void split(const GpuMat& src, std::vector<GpuMat>& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes magnitude of complex (x(i).re, x(i).im) vector
+ +//! supports only CV_32FC2 type
+ +CV_EXPORTS void magnitude(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null());
+ +
+ +//! computes squared magnitude of complex (x(i).re, x(i).im) vector
+ +//! supports only CV_32FC2 type
+ +CV_EXPORTS void magnitudeSqr(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null());
+ +
+ +//! computes magnitude of each (x(i), y(i)) vector
+ +//! supports only floating-point source
+ +CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
+ +
+ +//! computes squared magnitude of each (x(i), y(i)) vector
+ +//! supports only floating-point source
+ +CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null());
+ +
+ +//! computes angle (angle(i)) of each (x(i), y(i)) vector
+ +//! supports only floating-point source
+ +CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
+ +
+ +//! converts Cartesian coordinates to polar
+ +//! supports only floating-point source
+ +CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null());
+ +
+ +//! converts polar coordinates to Cartesian
+ +//! supports only floating-point source
+ +CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null());
+ +
+ +//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
+ +CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0,
+ +                          int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat());
+ +CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b,
+ +                          int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf);
+ +
+ +
+ +//////////////////////////// Per-element operations ////////////////////////////////////
+ +
+ +//! adds one matrix to another (c = a + b)
+ +CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+ +//! adds scalar to a matrix (c = a + s)
+ +CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+ +
+ +//! subtracts one matrix from another (c = a - b)
+ +CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+ +//! subtracts scalar from a matrix (c = a - s)
+ +CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null());
+ +
+ +//! computes element-wise weighted product of the two arrays (c = scale * a * b)
+ +CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+ +//! weighted multiplies matrix to a scalar (c = scale * a * s)
+ +CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+ +
+ +//! computes element-wise weighted quotient of the two arrays (c = a / b)
+ +CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+ +//! computes element-wise weighted quotient of matrix and scalar (c = a / s)
+ +CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
+ +//! computes element-wise weighted reciprocal of an array (dst = scale/src2)
+ +CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null());
+ +
+ +//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma)
+ +CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst,
+ +                            int dtype = -1, Stream& stream = Stream::Null());
+ +
+ +//! adds scaled array to another one (dst = alpha*src1 + src2)
+ +static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null())
+ +{
+ +    addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream);
+ +}
+ +
+ +//! computes element-wise absolute difference of two arrays (c = abs(a - b))
+ +CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
+ +//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
+ +CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null());
+ +
+ +//! computes absolute value of each matrix element
+ +//! supports CV_16S and CV_32F depth
+ +CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes square of each pixel in an image
+ +//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+ +CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes square root of each pixel in an image
+ +//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+ +CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes exponent of each matrix element (b = e**a)
+ +//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+ +CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
+ +
+ +//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
+ +//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
+ +CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
+ +
+ +//! computes power of each matrix element:
+ +//    (dst(i,j) = pow(     src(i,j) , power), if src.type() is integer
+ +//    (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
+ +//! supports all, except depth == CV_64F
+ +CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! compares elements of two arrays (c = a <cmpop> b)
+ +CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
+ +CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null());
+ +
+ +//! performs per-elements bit-wise inversion
+ +CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+ +
+ +//! calculates per-element bit-wise disjunction of two arrays
+ +CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+ +//! calculates per-element bit-wise disjunction of array and scalar
+ +//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+ +CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! calculates per-element bit-wise conjunction of two arrays
+ +CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+ +//! calculates per-element bit-wise conjunction of array and scalar
+ +//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+ +CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! calculates per-element bit-wise "exclusive or" operation
+ +CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null());
+ +//! calculates per-element bit-wise "exclusive or" of array and scalar
+ +//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+ +CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! pixel by pixel right shift of an image by a constant value
+ +//! supports 1, 3 and 4 channels images with integers elements
+ +CV_EXPORTS void rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! pixel by pixel left shift of an image by a constant value
+ +//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
+ +CV_EXPORTS void lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes per-element minimum of two arrays (dst = min(src1, src2))
+ +CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes per-element minimum of array and scalar (dst = min(src1, src2))
+ +CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes per-element maximum of two arrays (dst = max(src1, src2))
+ +CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! computes per-element maximum of array and scalar (dst = max(src1, src2))
+ +CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL,
+ +       ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL};
+ +
+ +//! Composite two images using alpha opacity values contained in each image
+ +//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types
+ +CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null());
+ +
+ +
+ +////////////////////////////// Image processing //////////////////////////////
+ +
+ +//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
+ +//! supports only CV_32FC1 map type
+ +CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
+ +                      int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(),
+ +                      Stream& stream = Stream::Null());
+ +
+ +//! Does mean shift filtering on GPU.
+ +CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
+ +                                   TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
+ +                                   Stream& stream = Stream::Null());
+ +
+ +//! Does mean shift procedure on GPU.
+ +CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
+ +                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
+ +                              Stream& stream = Stream::Null());
+ +
+ +//! Does mean shift segmentation with elimination of small regions.
+ +CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
+ +                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV.
+ +//! Supported types of input disparity: CV_8U, CV_16S.
+ +//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
+ +CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null());
+ +
+ +//! Reprojects disparity image to 3D space.
+ +//! Supports CV_8U and CV_16S types of input disparity.
+ +//! The output is a 3- or 4-channel floating-point matrix.
+ +//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map.
+ +//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify.
+ +CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null());
+ +
+ +//! converts image from one color space to another
+ +CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null());
+ +
++enum
++{
++    // Bayer Demosaicing (Malvar, He, and Cutler)
++    COLOR_BayerBG2BGR_MHT = 256,
++    COLOR_BayerGB2BGR_MHT = 257,
++    COLOR_BayerRG2BGR_MHT = 258,
++    COLOR_BayerGR2BGR_MHT = 259,
++
++    COLOR_BayerBG2RGB_MHT = COLOR_BayerRG2BGR_MHT,
++    COLOR_BayerGB2RGB_MHT = COLOR_BayerGR2BGR_MHT,
++    COLOR_BayerRG2RGB_MHT = COLOR_BayerBG2BGR_MHT,
++    COLOR_BayerGR2RGB_MHT = COLOR_BayerGB2BGR_MHT,
++
++    COLOR_BayerBG2GRAY_MHT = 260,
++    COLOR_BayerGB2GRAY_MHT = 261,
++    COLOR_BayerRG2GRAY_MHT = 262,
++    COLOR_BayerGR2GRAY_MHT = 263
++};
++CV_EXPORTS void demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn = -1, Stream& stream = Stream::Null());
++
+ +//! swap channels
+ +//! dstOrder - Integer array describing how channel values are permutated. The n-th entry
+ +//!            of the array contains the number of the channel that is stored in the n-th channel of
+ +//!            the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR
+ +//!            channel order.
+ +CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null());
+ +
+ +//! Routines for correcting image color gamma
+ +CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null());
+ +
+ +//! applies fixed threshold to the image
+ +CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());
+ +
+ +//! resizes the image
+ +//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA
+ +CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
+ +
+ +//! warps the image using affine transformation
+ +//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+ +    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
+ +
+ +CV_EXPORTS void buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+ +
+ +//! warps the image using perspective transformation
+ +//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,
+ +    int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null());
+ +
+ +CV_EXPORTS void buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null());
+ +
+ +//! builds plane warping maps
+ +CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, float scale,
+ +                                   GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+ +
+ +//! builds cylindrical warping maps
+ +CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+ +                                         GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+ +
+ +//! builds spherical warping maps
+ +CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
+ +                                       GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());
+ +
+ +//! rotates an image around the origin (0,0) and then shifts it
+ +//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth
+ +CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
+ +                       int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
+ +
+ +//! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+ +CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType,
+ +                               const Scalar& value = Scalar(), Stream& stream = Stream::Null());
+ +
+ +//! computes the integral image
+ +//! sum will have CV_32S type, but will contain unsigned int values
+ +//! supports only CV_8UC1 source type
+ +CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null());
+ +//! buffered version
+ +CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null());
+ +
+ +//! computes squared integral image
+ +//! result matrix will have 64F type, but will contain 64U values
+ +//! supports source images of 8UC1 type only
+ +CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null());
+ +
+ +//! computes vertical sum, supports only CV_32FC1 images
+ +CV_EXPORTS void columnSum(const GpuMat& src, GpuMat& sum);
+ +
+ +//! computes the standard deviation of integral images
+ +//! supports only CV_32SC1 source type and CV_32FC1 sqr type
+ +//! output will have CV_32FC1 type
+ +CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
+ +
+ +//! computes Harris cornerness criteria at each image pixel
+ +CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
+ +CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
+ +CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
+ +                             int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
+ +
+ +//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
+ +CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
+ +CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
+ +CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
+ +    int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
+ +
+ +//! performs per-element multiplication of two full (not packed) Fourier spectrums
+ +//! supports 32FC2 matrixes only (interleaved format)
+ +CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream = Stream::Null());
+ +
+ +//! performs per-element multiplication of two full (not packed) Fourier spectrums
+ +//! supports 32FC2 matrixes only (interleaved format)
+ +CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null());
+ +
+ +//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ +//! Param dft_size is the size of DFT transform.
+ +//!
+ +//! If the source matrix is not continous, then additional copy will be done,
+ +//! so to avoid copying ensure the source matrix is continous one. If you want to use
+ +//! preallocated output ensure it is continuous too, otherwise it will be reallocated.
+ +//!
+ +//! Being implemented via CUFFT real-to-complex transform result contains only non-redundant values
+ +//! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved.
+ +//!
+ +//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format.
+ +CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
+ +
+ +struct CV_EXPORTS ConvolveBuf
+ +{
+ +    Size result_size;
+ +    Size block_size;
+ +    Size user_block_size;
+ +    Size dft_size;
+ +    int spect_len;
+ +
+ +    GpuMat image_spect, templ_spect, result_spect;
+ +    GpuMat image_block, templ_block, result_data;
+ +
+ +    void create(Size image_size, Size templ_size);
+ +    static Size estimateBlockSize(Size result_size, Size templ_size);
+ +};
+ +
+ +
+ +//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
+ +//! supports source images of 32FC1 type only
+ +//! result matrix will have 32FC1 type
+ +CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr = false);
+ +CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null());
+ +
+ +struct CV_EXPORTS MatchTemplateBuf
+ +{
+ +    Size user_block_size;
+ +    GpuMat imagef, templf;
+ +    std::vector<GpuMat> images;
+ +    std::vector<GpuMat> image_sums;
+ +    std::vector<GpuMat> image_sqsums;
+ +};
+ +
+ +//! computes the proximity map for the raster template and the image where the template is searched for
+ +CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null());
+ +
+ +//! computes the proximity map for the raster template and the image where the template is searched for
+ +CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null());
+ +
+ +//! smoothes the source image and downsamples it
+ +CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! upsamples the source image and then smoothes it
+ +CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +//! performs linear blending of two images
+ +//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ +CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
+ +                            GpuMat& result, Stream& stream = Stream::Null());
+ +
+ +//! Performa bilateral filtering of passsed image
+ +CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial,
+ +                                int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null());
+ +
+ +//! Brute force non-local means algorith (slow but universal)
+ +CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
+ +
+ +//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique)
+ +class CV_EXPORTS FastNonLocalMeansDenoising
+ +{
+ +public:
+ +    //! Simple method, recommended for grayscale images (though it supports multichannel images)
+ +    void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
+ +
+ +    //! Processes luminance and color components separatelly
+ +    void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
+ +
+ +private:
+ +
+ +    GpuMat buffer, extended_src_buffer;
+ +    GpuMat lab, l, ab;
+ +};
+ +
+ +struct CV_EXPORTS CannyBuf
+ +{
+ +    void create(const Size& image_size, int apperture_size = 3);
+ +    void release();
+ +
+ +    GpuMat dx, dy;
+ +    GpuMat mag;
+ +    GpuMat map;
+ +    GpuMat st1, st2;
+ +    Ptr<FilterEngine_GPU> filterDX, filterDY;
+ +};
+ +
+ +CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +
+ +class CV_EXPORTS ImagePyramid
+ +{
+ +public:
+ +    inline ImagePyramid() : nLayers_(0) {}
+ +    inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null())
+ +    {
+ +        build(img, nLayers, stream);
+ +    }
+ +
+ +    void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null());
+ +
+ +    void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const;
+ +
+ +    inline void release()
+ +    {
+ +        layer0_.release();
+ +        pyramid_.clear();
+ +        nLayers_ = 0;
+ +    }
+ +
+ +private:
+ +    GpuMat layer0_;
+ +    std::vector<GpuMat> pyramid_;
+ +    int nLayers_;
+ +};
+ +
+ +//! HoughLines
+ +
+ +struct HoughLinesBuf
+ +{
+ +    GpuMat accum;
+ +    GpuMat list;
+ +};
+ +
+ +CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
+ +CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096);
+ +CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
+ +
+ +//! HoughLinesP
+ +
+ +//! finds line segments in the black-n-white image using probabalistic Hough transform
+ +CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096);
+ +
+ +//! HoughCircles
+ +
+ +struct HoughCirclesBuf
+ +{
+ +    GpuMat edges;
+ +    GpuMat accum;
+ +    GpuMat list;
+ +    CannyBuf cannyBuf;
+ +};
+ +
+ +CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +CV_EXPORTS void HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles);
+ +
+ +//! finds arbitrary template in the grayscale image using Generalized Hough Transform
+ +//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122.
+ +//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038.
+ +class CV_EXPORTS GeneralizedHough_GPU : public cv::Algorithm
+ +{
+ +public:
+ +    static Ptr<GeneralizedHough_GPU> create(int method);
+ +
+ +    virtual ~GeneralizedHough_GPU();
+ +
+ +    //! set template to search
+ +    void setTemplate(const GpuMat& templ, int cannyThreshold = 100, Point templCenter = Point(-1, -1));
+ +    void setTemplate(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter = Point(-1, -1));
+ +
+ +    //! find template on image
+ +    void detect(const GpuMat& image, GpuMat& positions, int cannyThreshold = 100);
+ +    void detect(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions);
+ +
+ +    void download(const GpuMat& d_positions, OutputArray h_positions, OutputArray h_votes = noArray());
+ +
+ +    void release();
+ +
+ +protected:
+ +    virtual void setTemplateImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter) = 0;
+ +    virtual void detectImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions) = 0;
+ +    virtual void releaseImpl() = 0;
+ +
+ +private:
+ +    GpuMat edges_;
+ +    CannyBuf cannyBuf_;
+ +};
+ +
+ +////////////////////////////// Matrix reductions //////////////////////////////
+ +
+ +//! computes mean value and standard deviation of all or selected array elements
+ +//! supports only CV_8UC1 type
+ +CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);
+ +//! buffered version
+ +CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
+ +
+ +//! computes norm of array
+ +//! supports NORM_INF, NORM_L1, NORM_L2
+ +//! supports all matrices except 64F
+ +CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2);
+ +CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf);
+ +CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf);
+ +
+ +//! computes norm of the difference between two arrays
+ +//! supports NORM_INF, NORM_L1, NORM_L2
+ +//! supports only CV_8UC1 type
+ +CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2);
+ +
+ +//! computes sum of array elements
+ +//! supports only single channel images
+ +CV_EXPORTS Scalar sum(const GpuMat& src);
+ +CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf);
+ +CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+ +
+ +//! computes sum of array elements absolute values
+ +//! supports only single channel images
+ +CV_EXPORTS Scalar absSum(const GpuMat& src);
+ +CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf);
+ +CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+ +
+ +//! computes squared sum of array elements
+ +//! supports only single channel images
+ +CV_EXPORTS Scalar sqrSum(const GpuMat& src);
+ +CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf);
+ +CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf);
+ +
+ +//! finds global minimum and maximum array elements and returns their values
+ +CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat());
+ +CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf);
+ +
+ +//! finds global minimum and maximum array elements and returns their values with locations
+ +CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
+ +                          const GpuMat& mask=GpuMat());
+ +CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
+ +                          const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf);
+ +
+ +//! counts non-zero array elements
+ +CV_EXPORTS int countNonZero(const GpuMat& src);
+ +CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf);
+ +
+ +//! reduces a matrix to a vector
+ +CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null());
+ +
+ +
+ +///////////////////////////// Calibration 3D //////////////////////////////////
+ +
+ +CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
+ +                                GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
+ +                              const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
+ +                              Stream& stream = Stream::Null());
+ +
+ +CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
+ +                               const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
+ +                               int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
+ +                               std::vector<int>* inliers=NULL);
+ +
+ +//////////////////////////////// Image Labeling ////////////////////////////////
+ +
+ +//!performs labeling via graph cuts of a 2D regular 4-connected graph.
+ +CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
+ +                         GpuMat& buf, Stream& stream = Stream::Null());
+ +
+ +//!performs labeling via graph cuts of a 2D regular 8-connected graph.
+ +CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
+ +                         GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight,
+ +                         GpuMat& labels,
+ +                         GpuMat& buf, Stream& stream = Stream::Null());
+ +
+ +//! compute mask for Generalized Flood fill componetns labeling.
+ +CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
+ +
+ +//! performs connected componnents labeling.
+ +CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null());
+ +
+ +////////////////////////////////// Histograms //////////////////////////////////
+ +
+ +//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
+ +CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel);
+ +//! Calculates histogram with evenly distributed bins for signle channel source.
+ +//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types.
+ +//! Output hist will have one row and histSize cols and CV_32SC1 type.
+ +CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
+ +CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
+ +//! Calculates histogram with evenly distributed bins for four-channel source.
+ +//! All channels of source are processed separately.
+ +//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types.
+ +//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type.
+ +CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
+ +CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
+ +//! Calculates histogram with bins determined by levels array.
+ +//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+ +//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types.
+ +//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type.
+ +CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null());
+ +CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null());
+ +//! Calculates histogram with bins determined by levels array.
+ +//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise.
+ +//! All channels of source are processed separately.
+ +//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types.
+ +//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type.
+ +CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
+ +CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null());
+ +
+ +//! Calculates histogram for 8u one channel image
+ +//! Output hist will have one row, 256 cols and CV32SC1 type.
+ +CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null());
++CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+ +
+ +//! normalizes the grayscale image brightness and contrast by normalizing its histogram
+ +CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
++CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null());
+ +CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null());
+ +
+ +//////////////////////////////// StereoBM_GPU ////////////////////////////////
+ +
+ +class CV_EXPORTS StereoBM_GPU
+ +{
+ +public:
+ +    enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+ +
+ +    enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+ +
+ +    //! the default constructor
+ +    StereoBM_GPU();
+ +    //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+ +    StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+ +
+ +    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+ +    //! Output disparity has CV_8U type.
+ +    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
+ +
+ +    //! Some heuristics that tries to estmate
+ +    // if current GPU will be faster than CPU in this algorithm.
+ +    // It queries current active device.
+ +    static bool checkIfGpuCallReasonable();
+ +
+ +    int preset;
+ +    int ndisp;
+ +    int winSize;
+ +
+ +    // If avergeTexThreshold  == 0 => post procesing is disabled
+ +    // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+ +    // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+ +    // i.e. input left image is low textured.
+ +    float avergeTexThreshold;
+ +
+ +private:
+ +    GpuMat minSSD, leBuf, riBuf;
+ +};
+ +
+ +////////////////////////// StereoBeliefPropagation ///////////////////////////
+ +// "Efficient Belief Propagation for Early Vision"
+ +// P.Felzenszwalb
+ +
+ +class CV_EXPORTS StereoBeliefPropagation
+ +{
+ +public:
+ +    enum { DEFAULT_NDISP  = 64 };
+ +    enum { DEFAULT_ITERS  = 5  };
+ +    enum { DEFAULT_LEVELS = 5  };
+ +
+ +    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels);
+ +
+ +    //! the default constructor
+ +    explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
+ +                                     int iters  = DEFAULT_ITERS,
+ +                                     int levels = DEFAULT_LEVELS,
+ +                                     int msg_type = CV_32F);
+ +
+ +    //! the full constructor taking the number of disparities, number of BP iterations on each level,
+ +    //! number of levels, truncation of data cost, data weight,
+ +    //! truncation of discontinuity cost and discontinuity single jump
+ +    //! DataTerm = data_weight * min(fabs(I2-I1), max_data_term)
+ +    //! DiscTerm = min(disc_single_jump * fabs(f1-f2), max_disc_term)
+ +    //! please see paper for more details
+ +    StereoBeliefPropagation(int ndisp, int iters, int levels,
+ +        float max_data_term, float data_weight,
+ +        float max_disc_term, float disc_single_jump,
+ +        int msg_type = CV_32F);
+ +
+ +    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
+ +    //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
+ +    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
+ +
+ +
+ +    //! version for user specified data term
+ +    void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
+ +
+ +    int ndisp;
+ +
+ +    int iters;
+ +    int levels;
+ +
+ +    float max_data_term;
+ +    float data_weight;
+ +    float max_disc_term;
+ +    float disc_single_jump;
+ +
+ +    int msg_type;
+ +private:
+ +    GpuMat u, d, l, r, u2, d2, l2, r2;
+ +    std::vector<GpuMat> datas;
+ +    GpuMat out;
+ +};
+ +
+ +/////////////////////////// StereoConstantSpaceBP ///////////////////////////
+ +// "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
+ +// Qingxiong Yang, Liang Wang, Narendra Ahuja
+ +// http://vision.ai.uiuc.edu/~qyang6/
+ +
+ +class CV_EXPORTS StereoConstantSpaceBP
+ +{
+ +public:
+ +    enum { DEFAULT_NDISP    = 128 };
+ +    enum { DEFAULT_ITERS    = 8   };
+ +    enum { DEFAULT_LEVELS   = 4   };
+ +    enum { DEFAULT_NR_PLANE = 4   };
+ +
+ +    static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane);
+ +
+ +    //! the default constructor
+ +    explicit StereoConstantSpaceBP(int ndisp    = DEFAULT_NDISP,
+ +                                   int iters    = DEFAULT_ITERS,
+ +                                   int levels   = DEFAULT_LEVELS,
+ +                                   int nr_plane = DEFAULT_NR_PLANE,
+ +                                   int msg_type = CV_32F);
+ +
+ +    //! the full constructor taking the number of disparities, number of BP iterations on each level,
+ +    //! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
+ +    //! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold
+ +    StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
+ +        float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
+ +        int min_disp_th = 0,
+ +        int msg_type = CV_32F);
+ +
+ +    //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
+ +    //! if disparity is empty output type will be CV_16S else output type will be disparity.type().
+ +    void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
+ +
+ +    int ndisp;
+ +
+ +    int iters;
+ +    int levels;
+ +
+ +    int nr_plane;
+ +
+ +    float max_data_term;
+ +    float data_weight;
+ +    float max_disc_term;
+ +    float disc_single_jump;
+ +
+ +    int min_disp_th;
+ +
+ +    int msg_type;
+ +
+ +    bool use_local_init_data_cost;
+ +private:
+ +    GpuMat messages_buffers;
+ +
+ +    GpuMat temp;
+ +    GpuMat out;
+ +};
+ +
+ +/////////////////////////// DisparityBilateralFilter ///////////////////////////
+ +// Disparity map refinement using joint bilateral filtering given a single color image.
+ +// Qingxiong Yang, Liang Wang, Narendra Ahuja
+ +// http://vision.ai.uiuc.edu/~qyang6/
+ +
+ +class CV_EXPORTS DisparityBilateralFilter
+ +{
+ +public:
+ +    enum { DEFAULT_NDISP  = 64 };
+ +    enum { DEFAULT_RADIUS = 3 };
+ +    enum { DEFAULT_ITERS  = 1 };
+ +
+ +    //! the default constructor
+ +    explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS);
+ +
+ +    //! the full constructor taking the number of disparities, filter radius,
+ +    //! number of iterations, truncation of data continuity, truncation of disparity continuity
+ +    //! and filter range sigma
+ +    DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range);
+ +
+ +    //! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
+ +    //! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
+ +    void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null());
+ +
+ +private:
+ +    int ndisp;
+ +    int radius;
+ +    int iters;
+ +
+ +    float edge_threshold;
+ +    float max_disc_threshold;
+ +    float sigma_range;
+ +
+ +    GpuMat table_color;
+ +    GpuMat table_space;
+ +};
+ +
+ +
+ +//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+ +struct CV_EXPORTS HOGConfidence
+ +{
+ +   double scale;
+ +   std::vector<Point> locations;
+ +   std::vector<double> confidences;
+ +   std::vector<double> part_scores[4];
+ +};
+ +
+ +struct CV_EXPORTS HOGDescriptor
+ +{
+ +    enum { DEFAULT_WIN_SIGMA = -1 };
+ +    enum { DEFAULT_NLEVELS = 64 };
+ +    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+ +
+ +    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
+ +                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
+ +                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
+ +                  double threshold_L2hys=0.2, bool gamma_correction=true,
+ +                  int nlevels=DEFAULT_NLEVELS);
+ +
+ +    size_t getDescriptorSize() const;
+ +    size_t getBlockHistogramSize() const;
+ +
+ +    void setSVMDetector(const std::vector<float>& detector);
+ +
+ +    static std::vector<float> getDefaultPeopleDetector();
+ +    static std::vector<float> getPeopleDetector48x96();
+ +    static std::vector<float> getPeopleDetector64x128();
+ +
+ +    void detect(const GpuMat& img, std::vector<Point>& found_locations,
+ +                double hit_threshold=0, Size win_stride=Size(),
+ +                Size padding=Size());
+ +
+ +    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+ +                          double hit_threshold=0, Size win_stride=Size(),
+ +                          Size padding=Size(), double scale0=1.05,
+ +                          int group_threshold=2);
+ +
+ +    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
+ +                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
+ +
+ +    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+ +                                                                    double hit_threshold, Size win_stride, Size padding,
+ +                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
+ +
+ +    void getDescriptors(const GpuMat& img, Size win_stride,
+ +                        GpuMat& descriptors,
+ +                        int descr_format=DESCR_FORMAT_COL_BY_COL);
+ +
+ +    Size win_size;
+ +    Size block_size;
+ +    Size block_stride;
+ +    Size cell_size;
+ +    int nbins;
+ +    double win_sigma;
+ +    double threshold_L2hys;
+ +    bool gamma_correction;
+ +    int nlevels;
+ +
+ +protected:
+ +    void computeBlockHistograms(const GpuMat& img);
+ +    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
+ +
+ +    double getWinSigma() const;
+ +    bool checkDetectorSize() const;
+ +
+ +    static int numPartsWithin(int size, int part_size, int stride);
+ +    static Size numPartsWithin(Size size, Size part_size, Size stride);
+ +
+ +    // Coefficients of the separating plane
+ +    float free_coef;
+ +    GpuMat detector;
+ +
+ +    // Results of the last classification step
+ +    GpuMat labels, labels_buf;
+ +    Mat labels_host;
+ +
+ +    // Results of the last histogram evaluation step
+ +    GpuMat block_hists, block_hists_buf;
+ +
+ +    // Gradients conputation results
+ +    GpuMat grad, qangle, grad_buf, qangle_buf;
+ +
+ +    // returns subbuffer with required size, reallocates buffer if nessesary.
+ +    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
+ +    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
+ +
+ +    std::vector<GpuMat> image_scales;
+ +};
+ +
+ +
+ +////////////////////////////////// BruteForceMatcher //////////////////////////////////
+ +
+ +class CV_EXPORTS BFMatcher_GPU
+ +{
+ +public:
+ +    explicit BFMatcher_GPU(int norm = cv::NORM_L2);
+ +
+ +    // Add descriptors to train descriptor collection
+ +    void add(const std::vector<GpuMat>& descCollection);
+ +
+ +    // Get train descriptors collection
+ +    const std::vector<GpuMat>& getTrainDescriptors() const;
+ +
+ +    // Clear train descriptors collection
+ +    void clear();
+ +
+ +    // Return true if there are not train descriptors in collection
+ +    bool empty() const;
+ +
+ +    // Return true if the matcher supports mask in match methods
+ +    bool isMaskSupported() const;
+ +
+ +    // Find one best match for each query descriptor
+ +    void matchSingle(const GpuMat& query, const GpuMat& train,
+ +        GpuMat& trainIdx, GpuMat& distance,
+ +        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+ +
+ +    // Download trainIdx and distance and convert it to CPU vector with DMatch
+ +    static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
+ +    // Convert trainIdx and distance to vector with DMatch
+ +    static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
+ +
+ +    // Find one best match for each query descriptor
+ +    void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat());
+ +
+ +    // Make gpu collection of trains and masks in suitable format for matchCollection function
+ +    void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
+ +
+ +    // Find one best match from train collection for each query descriptor
+ +    void matchCollection(const GpuMat& query, const GpuMat& trainCollection,
+ +        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
+ +        const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());
+ +
+ +    // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+ +    static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
+ +    // Convert trainIdx, imgIdx and distance to vector with DMatch
+ +    static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
+ +
+ +    // Find one best match from train collection for each query descriptor.
+ +    void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
+ +
+ +    // Find k best matches for each query descriptor (in increasing order of distances)
+ +    void knnMatchSingle(const GpuMat& query, const GpuMat& train,
+ +        GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
+ +        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+ +
+ +    // Download trainIdx and distance and convert it to vector with DMatch
+ +    // compactResult is used when mask is not empty. If compactResult is false matches
+ +    // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +    // matches vector will not contain matches for fully masked out query descriptors.
+ +    static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +    // Convert trainIdx and distance to vector with DMatch
+ +    static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +
+ +    // Find k best matches for each query descriptor (in increasing order of distances).
+ +    // compactResult is used when mask is not empty. If compactResult is false matches
+ +    // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +    // matches vector will not contain matches for fully masked out query descriptors.
+ +    void knnMatch(const GpuMat& query, const GpuMat& train,
+ +        std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(),
+ +        bool compactResult = false);
+ +
+ +    // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+ +    void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
+ +        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
+ +        const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null());
+ +
+ +    // Download trainIdx and distance and convert it to vector with DMatch
+ +    // compactResult is used when mask is not empty. If compactResult is false matches
+ +    // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +    // matches vector will not contain matches for fully masked out query descriptors.
+ +    static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +    // Convert trainIdx and distance to vector with DMatch
+ +    static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +
+ +    // Find k best matches  for each query descriptor (in increasing order of distances).
+ +    // compactResult is used when mask is not empty. If compactResult is false matches
+ +    // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +    // matches vector will not contain matches for fully masked out query descriptors.
+ +    void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
+ +        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
+ +
+ +    // Find best matches for each query descriptor which have distance less than maxDistance.
+ +    // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+ +    // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+ +    // because it didn't have enough memory.
+ +    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+ +    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +    // Matches doesn't sorted.
+ +    void radiusMatchSingle(const GpuMat& query, const GpuMat& train,
+ +        GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
+ +        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+ +
+ +    // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+ +    // matches will be sorted in increasing order of distances.
+ +    // compactResult is used when mask is not empty. If compactResult is false matches
+ +    // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +    // matches vector will not contain matches for fully masked out query descriptors.
+ +    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +    // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +    static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +
+ +    // Find best matches for each query descriptor which have distance less than maxDistance
+ +    // in increasing order of distances).
+ +    void radiusMatch(const GpuMat& query, const GpuMat& train,
+ +        std::vector< std::vector<DMatch> >& matches, float maxDistance,
+ +        const GpuMat& mask = GpuMat(), bool compactResult = false);
+ +
+ +    // Find best matches for each query descriptor which have distance less than maxDistance.
+ +    // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+ +    // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +    // Matches doesn't sorted.
+ +    void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
+ +        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null());
+ +
+ +    // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+ +    // matches will be sorted in increasing order of distances.
+ +    // compactResult is used when mask is not empty. If compactResult is false matches
+ +    // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +    // matches vector will not contain matches for fully masked out query descriptors.
+ +    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +    // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +    static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
+ +        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+ +
+ +    // Find best matches from train collection for each query descriptor which have distance less than
+ +    // maxDistance (in increasing order of distances).
+ +    void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
+ +        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
+ +
+ +    int norm;
+ +
+ +private:
+ +    std::vector<GpuMat> trainDescCollection;
+ +};
+ +
+ +template <class Distance>
+ +class CV_EXPORTS BruteForceMatcher_GPU;
+ +
+ +template <typename T>
+ +class CV_EXPORTS BruteForceMatcher_GPU< L1<T> > : public BFMatcher_GPU
+ +{
+ +public:
+ +    explicit BruteForceMatcher_GPU() : BFMatcher_GPU(NORM_L1) {}
+ +    explicit BruteForceMatcher_GPU(L1<T> /*d*/) : BFMatcher_GPU(NORM_L1) {}
+ +};
+ +template <typename T>
+ +class CV_EXPORTS BruteForceMatcher_GPU< L2<T> > : public BFMatcher_GPU
+ +{
+ +public:
+ +    explicit BruteForceMatcher_GPU() : BFMatcher_GPU(NORM_L2) {}
+ +    explicit BruteForceMatcher_GPU(L2<T> /*d*/) : BFMatcher_GPU(NORM_L2) {}
+ +};
+ +template <> class CV_EXPORTS BruteForceMatcher_GPU< Hamming > : public BFMatcher_GPU
+ +{
+ +public:
+ +    explicit BruteForceMatcher_GPU() : BFMatcher_GPU(NORM_HAMMING) {}
+ +    explicit BruteForceMatcher_GPU(Hamming /*d*/) : BFMatcher_GPU(NORM_HAMMING) {}
+ +};
+ +
+ +////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
+ +// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
+ +class CV_EXPORTS CascadeClassifier_GPU
+ +{
+ +public:
+ +    CascadeClassifier_GPU();
+ +    CascadeClassifier_GPU(const std::string& filename);
+ +    ~CascadeClassifier_GPU();
+ +
+ +    bool empty() const;
+ +    bool load(const std::string& filename);
+ +    void release();
+ +
+ +    /* returns number of detected objects */
+ +    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
+ +    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
+ +
+ +    bool findLargestObject;
+ +    bool visualizeInPlace;
+ +
+ +    Size getClassifierSize() const;
+ +
+ +private:
+ +    struct CascadeClassifierImpl;
+ +    CascadeClassifierImpl* impl;
+ +    struct HaarCascade;
+ +    struct LbpCascade;
+ +    friend class CascadeClassifier_GPU_LBP;
+ +};
+ +
- /*!
-  * The class implements the following algorithm:
-  * "ViBe: A universal background subtraction algorithm for video sequences"
-  * O. Barnich and M. Van D Roogenbroeck
-  * IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
-  */
- class CV_EXPORTS VIBE_GPU
- {
- public:
-     //! the default constructor
-     explicit VIBE_GPU(unsigned long rngSeed = 1234567);
- 
-     //! re-initiaization method
-     void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
- 
-     //! the update operator
-     void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
- 
-     //! releases all inner buffers
-     void release();
- 
-     int nbSamples;         // number of samples per pixel
-     int reqMatches;        // #_min
-     int radius;            // R
-     int subsamplingFactor; // amount of random subsampling
- 
- private:
-     Size frameSize_;
- 
-     unsigned long rngSeed_;
-     GpuMat randStates_;
- 
-     GpuMat samples_;
- };
- 
+ +////////////////////////////////// FAST //////////////////////////////////////////
+ +
+ +class CV_EXPORTS FAST_GPU
+ +{
+ +public:
+ +    enum
+ +    {
+ +        LOCATION_ROW = 0,
+ +        RESPONSE_ROW,
+ +        ROWS_COUNT
+ +    };
+ +
+ +    // all features have same size
+ +    static const int FEATURE_SIZE = 7;
+ +
+ +    explicit FAST_GPU(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
+ +
+ +    //! finds the keypoints using FAST detector
+ +    //! supports only CV_8UC1 images
+ +    void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
+ +    void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
+ +
+ +    //! download keypoints from device to host memory
+ +    static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
+ +
+ +    //! convert keypoints to KeyPoint vector
+ +    static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
+ +
+ +    //! release temporary buffer's memory
+ +    void release();
+ +
+ +    bool nonmaxSupression;
+ +
+ +    int threshold;
+ +
+ +    //! max keypoints = keypointsRatio * img.size().area()
+ +    double keypointsRatio;
+ +
+ +    //! find keypoints and compute it's response if nonmaxSupression is true
+ +    //! return count of detected keypoints
+ +    int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);
+ +
+ +    //! get final array of keypoints
+ +    //! performs nonmax supression if needed
+ +    //! return final count of keypoints
+ +    int getKeyPoints(GpuMat& keypoints);
+ +
+ +private:
+ +    GpuMat kpLoc_;
+ +    int count_;
+ +
+ +    GpuMat score_;
+ +
+ +    GpuMat d_keypoints_;
+ +};
+ +
+ +////////////////////////////////// ORB //////////////////////////////////////////
+ +
+ +class CV_EXPORTS ORB_GPU
+ +{
+ +public:
+ +    enum
+ +    {
+ +        X_ROW = 0,
+ +        Y_ROW,
+ +        RESPONSE_ROW,
+ +        ANGLE_ROW,
+ +        OCTAVE_ROW,
+ +        SIZE_ROW,
+ +        ROWS_COUNT
+ +    };
+ +
+ +    enum
+ +    {
+ +        DEFAULT_FAST_THRESHOLD = 20
+ +    };
+ +
+ +    //! Constructor
+ +    explicit ORB_GPU(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
+ +                     int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
+ +
+ +    //! Compute the ORB features on an image
+ +    //! image - the image to compute the features (supports only CV_8UC1 images)
+ +    //! mask - the mask to apply
+ +    //! keypoints - the resulting keypoints
+ +    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
+ +    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
+ +
+ +    //! Compute the ORB features and descriptors on an image
+ +    //! image - the image to compute the features (supports only CV_8UC1 images)
+ +    //! mask - the mask to apply
+ +    //! keypoints - the resulting keypoints
+ +    //! descriptors - descriptors array
+ +    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
+ +    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);
+ +
+ +    //! download keypoints from device to host memory
+ +    static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
+ +    //! convert keypoints to KeyPoint vector
+ +    static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
+ +
+ +    //! returns the descriptor size in bytes
+ +    inline int descriptorSize() const { return kBytes; }
+ +
+ +    inline void setFastParams(int threshold, bool nonmaxSupression = true)
+ +    {
+ +        fastDetector_.threshold = threshold;
+ +        fastDetector_.nonmaxSupression = nonmaxSupression;
+ +    }
+ +
+ +    //! release temporary buffer's memory
+ +    void release();
+ +
+ +    //! if true, image will be blurred before descriptors calculation
+ +    bool blurForDescriptor;
+ +
+ +private:
+ +    enum { kBytes = 32 };
+ +
+ +    void buildScalePyramids(const GpuMat& image, const GpuMat& mask);
+ +
+ +    void computeKeyPointsPyramid();
+ +
+ +    void computeDescriptors(GpuMat& descriptors);
+ +
+ +    void mergeKeyPoints(GpuMat& keypoints);
+ +
+ +    int nFeatures_;
+ +    float scaleFactor_;
+ +    int nLevels_;
+ +    int edgeThreshold_;
+ +    int firstLevel_;
+ +    int WTA_K_;
+ +    int scoreType_;
+ +    int patchSize_;
+ +
+ +    // The number of desired features per scale
+ +    std::vector<size_t> n_features_per_level_;
+ +
+ +    // Points to compute BRIEF descriptors from
+ +    GpuMat pattern_;
+ +
+ +    std::vector<GpuMat> imagePyr_;
+ +    std::vector<GpuMat> maskPyr_;
+ +
+ +    GpuMat buf_;
+ +
+ +    std::vector<GpuMat> keyPointsPyr_;
+ +    std::vector<int> keyPointsCount_;
+ +
+ +    FAST_GPU fastDetector_;
+ +
+ +    Ptr<FilterEngine_GPU> blurFilter;
+ +
+ +    GpuMat d_keypoints_;
+ +};
+ +
+ +////////////////////////////////// Optical Flow //////////////////////////////////////////
+ +
+ +class CV_EXPORTS BroxOpticalFlow
+ +{
+ +public:
+ +    BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
+ +        alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
+ +        inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
+ +    {
+ +    }
+ +
+ +    //! Compute optical flow
+ +    //! frame0 - source frame (supports only CV_32FC1 type)
+ +    //! frame1 - frame to track (with the same size and type as frame0)
+ +    //! u      - flow horizontal component (along x axis)
+ +    //! v      - flow vertical component (along y axis)
+ +    void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
+ +
+ +    //! flow smoothness
+ +    float alpha;
+ +
+ +    //! gradient constancy importance
+ +    float gamma;
+ +
+ +    //! pyramid scale factor
+ +    float scale_factor;
+ +
+ +    //! number of lagged non-linearity iterations (inner loop)
+ +    int inner_iterations;
+ +
+ +    //! number of warping iterations (number of pyramid levels)
+ +    int outer_iterations;
+ +
+ +    //! number of linear system solver iterations
+ +    int solver_iterations;
+ +
+ +    GpuMat buf;
+ +};
+ +
+ +class CV_EXPORTS GoodFeaturesToTrackDetector_GPU
+ +{
+ +public:
+ +    explicit GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+ +        int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+ +
+ +    //! return 1 rows matrix with CV_32FC2 type
+ +    void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat());
+ +
+ +    int maxCorners;
+ +    double qualityLevel;
+ +    double minDistance;
+ +
+ +    int blockSize;
+ +    bool useHarrisDetector;
+ +    double harrisK;
+ +
+ +    void releaseMemory()
+ +    {
+ +        Dx_.release();
+ +        Dy_.release();
+ +        buf_.release();
+ +        eig_.release();
+ +        minMaxbuf_.release();
+ +        tmpCorners_.release();
+ +    }
+ +
+ +private:
+ +    GpuMat Dx_;
+ +    GpuMat Dy_;
+ +    GpuMat buf_;
+ +    GpuMat eig_;
+ +    GpuMat minMaxbuf_;
+ +    GpuMat tmpCorners_;
+ +};
+ +
+ +inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners_, double qualityLevel_, double minDistance_,
+ +        int blockSize_, bool useHarrisDetector_, double harrisK_)
+ +{
+ +    maxCorners = maxCorners_;
+ +    qualityLevel = qualityLevel_;
+ +    minDistance = minDistance_;
+ +    blockSize = blockSize_;
+ +    useHarrisDetector = useHarrisDetector_;
+ +    harrisK = harrisK_;
+ +}
+ +
+ +
+ +class CV_EXPORTS PyrLKOpticalFlow
+ +{
+ +public:
+ +    PyrLKOpticalFlow();
+ +
+ +    void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
+ +        GpuMat& status, GpuMat* err = 0);
+ +
+ +    void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
+ +
+ +    void releaseMemory();
+ +
+ +    Size winSize;
+ +    int maxLevel;
+ +    int iters;
+ +    bool useInitialFlow;
+ +
+ +private:
+ +    std::vector<GpuMat> prevPyr_;
+ +    std::vector<GpuMat> nextPyr_;
+ +
+ +    GpuMat buf_;
+ +
+ +    GpuMat uPyr_[2];
+ +    GpuMat vPyr_[2];
+ +};
+ +
+ +
+ +class CV_EXPORTS FarnebackOpticalFlow
+ +{
+ +public:
+ +    FarnebackOpticalFlow()
+ +    {
+ +        numLevels = 5;
+ +        pyrScale = 0.5;
+ +        fastPyramids = false;
+ +        winSize = 13;
+ +        numIters = 10;
+ +        polyN = 5;
+ +        polySigma = 1.1;
+ +        flags = 0;
+ +    }
+ +
+ +    int numLevels;
+ +    double pyrScale;
+ +    bool fastPyramids;
+ +    int winSize;
+ +    int numIters;
+ +    int polyN;
+ +    double polySigma;
+ +    int flags;
+ +
+ +    void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
+ +
+ +    void releaseMemory()
+ +    {
+ +        frames_[0].release();
+ +        frames_[1].release();
+ +        pyrLevel_[0].release();
+ +        pyrLevel_[1].release();
+ +        M_.release();
+ +        bufM_.release();
+ +        R_[0].release();
+ +        R_[1].release();
+ +        blurredFrame_[0].release();
+ +        blurredFrame_[1].release();
+ +        pyramid0_.clear();
+ +        pyramid1_.clear();
+ +    }
+ +
+ +private:
+ +    void prepareGaussian(
+ +            int n, double sigma, float *g, float *xg, float *xxg,
+ +            double &ig11, double &ig03, double &ig33, double &ig55);
+ +
+ +    void setPolynomialExpansionConsts(int n, double sigma);
+ +
+ +    void updateFlow_boxFilter(
+ +            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
+ +            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
+ +
+ +    void updateFlow_gaussianBlur(
+ +            const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
+ +            GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
+ +
+ +    GpuMat frames_[2];
+ +    GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
+ +    std::vector<GpuMat> pyramid0_, pyramid1_;
+ +};
+ +
+ +
+ +// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+ +//
+ +// see reference:
+ +//   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+ +//   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+ +class CV_EXPORTS OpticalFlowDual_TVL1_GPU
+ +{
+ +public:
+ +    OpticalFlowDual_TVL1_GPU();
+ +
+ +    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
+ +
+ +    void collectGarbage();
+ +
+ +    /**
+ +     * Time step of the numerical scheme.
+ +     */
+ +    double tau;
+ +
+ +    /**
+ +     * Weight parameter for the data term, attachment parameter.
+ +     * This is the most relevant parameter, which determines the smoothness of the output.
+ +     * The smaller this parameter is, the smoother the solutions we obtain.
+ +     * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+ +     */
+ +    double lambda;
+ +
+ +    /**
+ +     * Weight parameter for (u - v)^2, tightness parameter.
+ +     * It serves as a link between the attachment and the regularization terms.
+ +     * In theory, it should have a small value in order to maintain both parts in correspondence.
+ +     * The method is stable for a large range of values of this parameter.
+ +     */
+ +    double theta;
+ +
+ +    /**
+ +     * Number of scales used to create the pyramid of images.
+ +     */
+ +    int nscales;
+ +
+ +    /**
+ +     * Number of warpings per scale.
+ +     * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+ +     * This is a parameter that assures the stability of the method.
+ +     * It also affects the running time, so it is a compromise between speed and accuracy.
+ +     */
+ +    int warps;
+ +
+ +    /**
+ +     * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+ +     * A small value will yield more accurate solutions at the expense of a slower convergence.
+ +     */
+ +    double epsilon;
+ +
+ +    /**
+ +     * Stopping criterion iterations number used in the numerical scheme.
+ +     */
+ +    int iterations;
+ +
+ +    bool useInitialFlow;
+ +
+ +private:
+ +    void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2);
+ +
+ +    std::vector<GpuMat> I0s;
+ +    std::vector<GpuMat> I1s;
+ +    std::vector<GpuMat> u1s;
+ +    std::vector<GpuMat> u2s;
+ +
+ +    GpuMat I1x_buf;
+ +    GpuMat I1y_buf;
+ +
+ +    GpuMat I1w_buf;
+ +    GpuMat I1wx_buf;
+ +    GpuMat I1wy_buf;
+ +
+ +    GpuMat grad_buf;
+ +    GpuMat rho_c_buf;
+ +
+ +    GpuMat p11_buf;
+ +    GpuMat p12_buf;
+ +    GpuMat p21_buf;
+ +    GpuMat p22_buf;
+ +
+ +    GpuMat diff_buf;
+ +    GpuMat norm_buf;
+ +};
+ +
+ +
+ +//! Calculates optical flow for 2 images using block matching algorithm */
+ +CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr,
+ +                                  Size block_size, Size shift_size, Size max_range, bool use_previous,
+ +                                  GpuMat& velx, GpuMat& vely, GpuMat& buf,
+ +                                  Stream& stream = Stream::Null());
+ +
+ +class CV_EXPORTS FastOpticalFlowBM
+ +{
+ +public:
+ +    void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null());
+ +
+ +private:
+ +    GpuMat buffer;
+ +    GpuMat extended_I0;
+ +    GpuMat extended_I1;
+ +};
+ +
+ +
+ +//! Interpolate frames (images) using provided optical flow (displacement field).
+ +//! frame0   - frame 0 (32-bit floating point images, single channel)
+ +//! frame1   - frame 1 (the same type and size)
+ +//! fu       - forward horizontal displacement
+ +//! fv       - forward vertical displacement
+ +//! bu       - backward horizontal displacement
+ +//! bv       - backward vertical displacement
+ +//! pos      - new frame position
+ +//! newFrame - new frame
+ +//! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 GpuMat;
+ +//!            occlusion masks            0, occlusion masks            1,
+ +//!            interpolated forward flow  0, interpolated forward flow  1,
+ +//!            interpolated backward flow 0, interpolated backward flow 1
+ +//!
+ +CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
+ +                                  const GpuMat& fu, const GpuMat& fv,
+ +                                  const GpuMat& bu, const GpuMat& bv,
+ +                                  float pos, GpuMat& newFrame, GpuMat& buf,
+ +                                  Stream& stream = Stream::Null());
+ +
+ +CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
+ +
+ +
+ +//////////////////////// Background/foreground segmentation ////////////////////////
+ +
+ +// Foreground Object Detection from Videos Containing Complex Background.
+ +// Liyuan Li, Weimin Huang, Irene Y.H. Gu, and Qi Tian.
+ +// ACM MM2003 9p
+ +class CV_EXPORTS FGDStatModel
+ +{
+ +public:
+ +    struct CV_EXPORTS Params
+ +    {
+ +        int Lc;  // Quantized levels per 'color' component. Power of two, typically 32, 64 or 128.
+ +        int N1c; // Number of color vectors used to model normal background color variation at a given pixel.
+ +        int N2c; // Number of color vectors retained at given pixel.  Must be > N1c, typically ~ 5/3 of N1c.
+ +        // Used to allow the first N1c vectors to adapt over time to changing background.
+ +
+ +        int Lcc;  // Quantized levels per 'color co-occurrence' component.  Power of two, typically 16, 32 or 64.
+ +        int N1cc; // Number of color co-occurrence vectors used to model normal background color variation at a given pixel.
+ +        int N2cc; // Number of color co-occurrence vectors retained at given pixel.  Must be > N1cc, typically ~ 5/3 of N1cc.
+ +        // Used to allow the first N1cc vectors to adapt over time to changing background.
+ +
+ +        bool is_obj_without_holes; // If TRUE we ignore holes within foreground blobs. Defaults to TRUE.
+ +        int perform_morphing;     // Number of erode-dilate-erode foreground-blob cleanup iterations.
+ +        // These erase one-pixel junk blobs and merge almost-touching blobs. Default value is 1.
+ +
+ +        float alpha1; // How quickly we forget old background pixel values seen. Typically set to 0.1.
+ +        float alpha2; // "Controls speed of feature learning". Depends on T. Typical value circa 0.005.
+ +        float alpha3; // Alternate to alpha2, used (e.g.) for quicker initial convergence. Typical value 0.1.
+ +
+ +        float delta;   // Affects color and color co-occurrence quantization, typically set to 2.
+ +        float T;       // A percentage value which determines when new features can be recognized as new background. (Typically 0.9).
+ +        float minArea; // Discard foreground blobs whose bounding box is smaller than this threshold.
+ +
+ +        // default Params
+ +        Params();
+ +    };
+ +
+ +    // out_cn - channels count in output result (can be 3 or 4)
+ +    // 4-channels require more memory, but a bit faster
+ +    explicit FGDStatModel(int out_cn = 3);
+ +    explicit FGDStatModel(const cv::gpu::GpuMat& firstFrame, const Params& params = Params(), int out_cn = 3);
+ +
+ +    ~FGDStatModel();
+ +
+ +    void create(const cv::gpu::GpuMat& firstFrame, const Params& params = Params());
+ +    void release();
+ +
+ +    int update(const cv::gpu::GpuMat& curFrame);
+ +
+ +    //8UC3 or 8UC4 reference background image
+ +    cv::gpu::GpuMat background;
+ +
+ +    //8UC1 foreground image
+ +    cv::gpu::GpuMat foreground;
+ +
+ +    std::vector< std::vector<cv::Point> > foreground_regions;
+ +
+ +private:
+ +    FGDStatModel(const FGDStatModel&);
+ +    FGDStatModel& operator=(const FGDStatModel&);
+ +
+ +    class Impl;
+ +    std::auto_ptr<Impl> impl_;
+ +};
+ +
+ +/*!
+ + Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+ +
+ + The class implements the following algorithm:
+ + "An improved adaptive background mixture model for real-time tracking with shadow detection"
+ + P. KadewTraKuPong and R. Bowden,
+ + Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+ + http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+ +*/
+ +class CV_EXPORTS MOG_GPU
+ +{
+ +public:
+ +    //! the default constructor
+ +    MOG_GPU(int nmixtures = -1);
+ +
+ +    //! re-initiaization method
+ +    void initialize(Size frameSize, int frameType);
+ +
+ +    //! the update operator
+ +    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null());
+ +
+ +    //! computes a background image which are the mean of all background gaussians
+ +    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
+ +
+ +    //! releases all inner buffers
+ +    void release();
+ +
+ +    int history;
+ +    float varThreshold;
+ +    float backgroundRatio;
+ +    float noiseSigma;
+ +
+ +private:
+ +    int nmixtures_;
+ +
+ +    Size frameSize_;
+ +    int frameType_;
+ +    int nframes_;
+ +
+ +    GpuMat weight_;
+ +    GpuMat sortKey_;
+ +    GpuMat mean_;
+ +    GpuMat var_;
+ +};
+ +
+ +/*!
+ + The class implements the following algorithm:
+ + "Improved adaptive Gausian mixture model for background subtraction"
+ + Z.Zivkovic
+ + International Conference Pattern Recognition, UK, August, 2004.
+ + http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+ +*/
+ +class CV_EXPORTS MOG2_GPU
+ +{
+ +public:
+ +    //! the default constructor
+ +    MOG2_GPU(int nmixtures = -1);
+ +
+ +    //! re-initiaization method
+ +    void initialize(Size frameSize, int frameType);
+ +
+ +    //! the update operator
+ +    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+ +
+ +    //! computes a background image which are the mean of all background gaussians
+ +    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
+ +
+ +    //! releases all inner buffers
+ +    void release();
+ +
+ +    // parameters
+ +    // you should call initialize after parameters changes
+ +
+ +    int history;
+ +
+ +    //! here it is the maximum allowed number of mixture components.
+ +    //! Actual number is determined dynamically per pixel
+ +    float varThreshold;
+ +    // threshold on the squared Mahalanobis distance to decide if it is well described
+ +    // by the background model or not. Related to Cthr from the paper.
+ +    // This does not influence the update of the background. A typical value could be 4 sigma
+ +    // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+ +
+ +    /////////////////////////
+ +    // less important parameters - things you might change but be carefull
+ +    ////////////////////////
+ +
+ +    float backgroundRatio;
+ +    // corresponds to fTB=1-cf from the paper
+ +    // TB - threshold when the component becomes significant enough to be included into
+ +    // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+ +    // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+ +    // it is considered foreground
+ +    // float noiseSigma;
+ +    float varThresholdGen;
+ +
+ +    //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+ +    //when a sample is close to the existing components. If it is not close
+ +    //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+ +    //Smaller Tg leads to more generated components and higher Tg might make
+ +    //lead to small number of components but they can grow too large
+ +    float fVarInit;
+ +    float fVarMin;
+ +    float fVarMax;
+ +
+ +    //initial variance  for the newly generated components.
+ +    //It will will influence the speed of adaptation. A good guess should be made.
+ +    //A simple way is to estimate the typical standard deviation from the images.
+ +    //I used here 10 as a reasonable value
+ +    // min and max can be used to further control the variance
+ +    float fCT; //CT - complexity reduction prior
+ +    //this is related to the number of samples needed to accept that a component
+ +    //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+ +    //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+ +
+ +    //shadow detection parameters
+ +    bool bShadowDetection; //default 1 - do shadow detection
+ +    unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+ +    float fTau;
+ +    // Tau - shadow threshold. The shadow is detected if the pixel is darker
+ +    //version of the background. Tau is a threshold on how much darker the shadow can be.
+ +    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+ +    //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+ +
+ +private:
+ +    int nmixtures_;
+ +
+ +    Size frameSize_;
+ +    int frameType_;
+ +    int nframes_;
+ +
+ +    GpuMat weight_;
+ +    GpuMat variance_;
+ +    GpuMat mean_;
+ +
+ +    GpuMat bgmodelUsedModes_; //keep track of number of modes per pixel
+ +};
+ +
+ +/**
+ + * Background Subtractor module. Takes a series of images and returns a sequence of mask (8UC1)
+ + * images of the same size, where 255 indicates Foreground and 0 represents Background.
+ + * This class implements an algorithm described in "Visual Tracking of Human Visitors under
+ + * Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere,
+ + * A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012.
+ + */
+ +class CV_EXPORTS GMG_GPU
+ +{
+ +public:
+ +    GMG_GPU();
+ +
+ +    /**
+ +     * Validate parameters and set up data structures for appropriate frame size.
+ +     * @param frameSize Input frame size
+ +     * @param min       Minimum value taken on by pixels in image sequence. Usually 0
+ +     * @param max       Maximum value taken on by pixels in image sequence. e.g. 1.0 or 255
+ +     */
+ +    void initialize(Size frameSize, float min = 0.0f, float max = 255.0f);
+ +
+ +    /**
+ +     * Performs single-frame background subtraction and builds up a statistical background image
+ +     * model.
+ +     * @param frame        Input frame
+ +     * @param fgmask       Output mask image representing foreground and background pixels
+ +     * @param stream       Stream for the asynchronous version
+ +     */
+ +    void operator ()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+ +
+ +    //! Releases all inner buffers
+ +    void release();
+ +
+ +    //! Total number of distinct colors to maintain in histogram.
+ +    int maxFeatures;
+ +
+ +    //! Set between 0.0 and 1.0, determines how quickly features are "forgotten" from histograms.
+ +    float learningRate;
+ +
+ +    //! Number of frames of video to use to initialize histograms.
+ +    int numInitializationFrames;
+ +
+ +    //! Number of discrete levels in each channel to be used in histograms.
+ +    int quantizationLevels;
+ +
+ +    //! Prior probability that any given pixel is a background pixel. A sensitivity parameter.
+ +    float backgroundPrior;
+ +
+ +    //! Value above which pixel is determined to be FG.
+ +    float decisionThreshold;
+ +
+ +    //! Smoothing radius, in pixels, for cleaning up FG image.
+ +    int smoothingRadius;
+ +
+ +    //! Perform background model update.
+ +    bool updateBackgroundModel;
+ +
+ +private:
+ +    float maxVal_, minVal_;
+ +
+ +    Size frameSize_;
+ +
+ +    int frameNum_;
+ +
+ +    GpuMat nfeatures_;
+ +    GpuMat colors_;
+ +    GpuMat weights_;
+ +
+ +    Ptr<FilterEngine_GPU> boxFilter_;
+ +    GpuMat buf_;
+ +};
+ +
+ +////////////////////////////////// Video Encoding //////////////////////////////////
+ +
+ +// Works only under Windows
+ +// Supports olny H264 video codec and AVI files
+ +class CV_EXPORTS VideoWriter_GPU
+ +{
+ +public:
+ +    struct EncoderParams;
+ +
+ +    // Callbacks for video encoder, use it if you want to work with raw video stream
+ +    class EncoderCallBack;
+ +
+ +    enum SurfaceFormat
+ +    {
+ +        SF_UYVY = 0,
+ +        SF_YUY2,
+ +        SF_YV12,
+ +        SF_NV12,
+ +        SF_IYUV,
+ +        SF_BGR,
+ +        SF_GRAY = SF_BGR
+ +    };
+ +
+ +    VideoWriter_GPU();
+ +    VideoWriter_GPU(const std::string& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+ +    VideoWriter_GPU(const std::string& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+ +    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+ +    VideoWriter_GPU(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+ +    ~VideoWriter_GPU();
+ +
+ +    // all methods throws cv::Exception if error occurs
+ +    void open(const std::string& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+ +    void open(const std::string& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+ +    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR);
+ +    void open(const cv::Ptr<EncoderCallBack>& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+ +
+ +    bool isOpened() const;
+ +    void close();
+ +
+ +    void write(const cv::gpu::GpuMat& image, bool lastFrame = false);
+ +
+ +    struct CV_EXPORTS EncoderParams
+ +    {
+ +        int       P_Interval;      //    NVVE_P_INTERVAL,
+ +        int       IDR_Period;      //    NVVE_IDR_PERIOD,
+ +        int       DynamicGOP;      //    NVVE_DYNAMIC_GOP,
+ +        int       RCType;          //    NVVE_RC_TYPE,
+ +        int       AvgBitrate;      //    NVVE_AVG_BITRATE,
+ +        int       PeakBitrate;     //    NVVE_PEAK_BITRATE,
+ +        int       QP_Level_Intra;  //    NVVE_QP_LEVEL_INTRA,
+ +        int       QP_Level_InterP; //    NVVE_QP_LEVEL_INTER_P,
+ +        int       QP_Level_InterB; //    NVVE_QP_LEVEL_INTER_B,
+ +        int       DeblockMode;     //    NVVE_DEBLOCK_MODE,
+ +        int       ProfileLevel;    //    NVVE_PROFILE_LEVEL,
+ +        int       ForceIntra;      //    NVVE_FORCE_INTRA,
+ +        int       ForceIDR;        //    NVVE_FORCE_IDR,
+ +        int       ClearStat;       //    NVVE_CLEAR_STAT,
+ +        int       DIMode;          //    NVVE_SET_DEINTERLACE,
+ +        int       Presets;         //    NVVE_PRESETS,
+ +        int       DisableCabac;    //    NVVE_DISABLE_CABAC,
+ +        int       NaluFramingType; //    NVVE_CONFIGURE_NALU_FRAMING_TYPE
+ +        int       DisableSPSPPS;   //    NVVE_DISABLE_SPS_PPS
+ +
+ +        EncoderParams();
+ +        explicit EncoderParams(const std::string& configFile);
+ +
+ +        void load(const std::string& configFile);
+ +        void save(const std::string& configFile) const;
+ +    };
+ +
+ +    EncoderParams getParams() const;
+ +
+ +    class CV_EXPORTS EncoderCallBack
+ +    {
+ +    public:
+ +        enum PicType
+ +        {
+ +            IFRAME = 1,
+ +            PFRAME = 2,
+ +            BFRAME = 3
+ +        };
+ +
+ +        virtual ~EncoderCallBack() {}
+ +
+ +        // callback function to signal the start of bitstream that is to be encoded
+ +        // must return pointer to buffer
+ +        virtual uchar* acquireBitStream(int* bufferSize) = 0;
+ +
+ +        // callback function to signal that the encoded bitstream is ready to be written to file
+ +        virtual void releaseBitStream(unsigned char* data, int size) = 0;
+ +
+ +        // callback function to signal that the encoding operation on the frame has started
+ +        virtual void onBeginFrame(int frameNumber, PicType picType) = 0;
+ +
+ +        // callback function signals that the encoding operation on the frame has finished
+ +        virtual void onEndFrame(int frameNumber, PicType picType) = 0;
+ +    };
+ +
+ +private:
+ +    VideoWriter_GPU(const VideoWriter_GPU&);
+ +    VideoWriter_GPU& operator=(const VideoWriter_GPU&);
+ +
+ +    class Impl;
+ +    std::auto_ptr<Impl> impl_;
+ +};
+ +
+ +
+ +////////////////////////////////// Video Decoding //////////////////////////////////////////
+ +
+ +namespace detail
+ +{
+ +    class FrameQueue;
+ +    class VideoParser;
+ +}
+ +
+ +class CV_EXPORTS VideoReader_GPU
+ +{
+ +public:
+ +    enum Codec
+ +    {
+ +        MPEG1 = 0,
+ +        MPEG2,
+ +        MPEG4,
+ +        VC1,
+ +        H264,
+ +        JPEG,
+ +        H264_SVC,
+ +        H264_MVC,
+ +
+ +        Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   // Y,U,V (4:2:0)
+ +        Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,V,U (4:2:0)
+ +        Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,UV  (4:2:0)
+ +        Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   // YUYV/YUY2 (4:2:2)
+ +        Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')),   // UYVY (4:2:2)
+ +    };
+ +
+ +    enum ChromaFormat
+ +    {
+ +        Monochrome=0,
+ +        YUV420,
+ +        YUV422,
+ +        YUV444,
+ +    };
+ +
+ +    struct FormatInfo
+ +    {
+ +        Codec codec;
+ +        ChromaFormat chromaFormat;
+ +        int width;
+ +        int height;
+ +    };
+ +
+ +    class VideoSource;
+ +
+ +    VideoReader_GPU();
+ +    explicit VideoReader_GPU(const std::string& filename);
+ +    explicit VideoReader_GPU(const cv::Ptr<VideoSource>& source);
+ +
+ +    ~VideoReader_GPU();
+ +
+ +    void open(const std::string& filename);
+ +    void open(const cv::Ptr<VideoSource>& source);
+ +    bool isOpened() const;
+ +
+ +    void close();
+ +
+ +    bool read(GpuMat& image);
+ +
+ +    FormatInfo format() const;
+ +    void dumpFormat(std::ostream& st);
+ +
+ +    class CV_EXPORTS VideoSource
+ +    {
+ +    public:
+ +        VideoSource() : frameQueue_(0), videoParser_(0) {}
+ +        virtual ~VideoSource() {}
+ +
+ +        virtual FormatInfo format() const = 0;
+ +        virtual void start() = 0;
+ +        virtual void stop() = 0;
+ +        virtual bool isStarted() const = 0;
+ +        virtual bool hasError() const = 0;
+ +
+ +        void setFrameQueue(detail::FrameQueue* frameQueue) { frameQueue_ = frameQueue; }
+ +        void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; }
+ +
+ +    protected:
+ +        bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false);
+ +
+ +    private:
+ +        VideoSource(const VideoSource&);
+ +        VideoSource& operator =(const VideoSource&);
+ +
+ +        detail::FrameQueue* frameQueue_;
+ +        detail::VideoParser* videoParser_;
+ +    };
+ +
+ +private:
+ +    VideoReader_GPU(const VideoReader_GPU&);
+ +    VideoReader_GPU& operator =(const VideoReader_GPU&);
+ +
+ +    class Impl;
+ +    std::auto_ptr<Impl> impl_;
+ +};
+ +
+ +//! removes points (CV_32FC2, single row matrix) with zero mask value
+ +CV_EXPORTS void compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask);
+ +
+ +CV_EXPORTS void calcWobbleSuppressionMaps(
+ +        int left, int idx, int right, Size size, const Mat &ml, const Mat &mr,
+ +        GpuMat &mapx, GpuMat &mapy);
+ +
+ +} // namespace gpu
+ +
+ +} // namespace cv
+ +
+ +#endif /* __OPENCV_GPU_HPP__ */
diff --cc modules/gpu/include/opencv2/gpu/gpu.hpp
Simple merge
diff --cc modules/gpu/perf/perf_precomp.hpp

index 71fe9e7,c1edaf4..322cac0
--- 1/modules/gpu/perf/perf_precomp.hpp
--- 2/modules/gpu/perf/perf_precomp.hpp
+++ b/modules/gpu/perf/perf_precomp.hpp
@@@ -18,20 -18,19 +18,18 @@@
   #include <cuda_runtime.h>
   #endif
   
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/ts/ts_perf.hpp"
+ +#include "opencv2/ts.hpp"
+ #include "opencv2/ts/gpu_perf.hpp"
   
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -#include "opencv2/gpu/gpu.hpp"
- -#include "opencv2/calib3d/calib3d.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/video/video.hpp"
- -#include "opencv2/legacy/legacy.hpp"
- -#include "opencv2/photo/photo.hpp"
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/highgui.hpp"
+ +#include "opencv2/gpu.hpp"
+ +#include "opencv2/calib3d.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/video.hpp"
- #include "opencv2/nonfree.hpp"
+ +#include "opencv2/legacy.hpp"
+ +#include "opencv2/photo.hpp"
   
- #include "utility.hpp"
- 
   #ifdef GTEST_CREATE_SHARED_LIBRARY
   #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
   #endif
diff --cc modules/gpu/perf4au/main.cpp

index f4a0443,f13349e..162a15b
--- 1/modules/gpu/perf4au/main.cpp
--- 2/modules/gpu/perf4au/main.cpp
+++ b/modules/gpu/perf4au/main.cpp
@@@ -2,59 -2,14 +2,13 @@@
   #ifdef HAVE_CVCONFIG_H
   #include "cvconfig.h"
   #endif
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/gpu/gpu.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -#include "opencv2/video/video.hpp"
- -#include "opencv2/legacy/legacy.hpp"
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/ts/ts_perf.hpp"
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/gpu.hpp"
+ +#include "opencv2/highgui.hpp"
+ +#include "opencv2/video.hpp"
+ +#include "opencv2/legacy.hpp"
+ +#include "opencv2/ts.hpp"
- 
- static void printOsInfo()
- {
- #if defined _WIN32
- #   if defined _WIN64
-         printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"); fflush(stdout);
- #   else
-         printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"); fflush(stdout);
- #   endif
- #elif defined linux
- #   if defined _LP64
-         printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"); fflush(stdout);
- #   else
-         printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"); fflush(stdout);
- #   endif
- #elif defined __APPLE__
- #   if defined _LP64
-         printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"); fflush(stdout);
- #   else
-         printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"); fflush(stdout);
- #   endif
- #endif
- }
- 
- static void printCudaInfo()
- {
-     const int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
- 
-     printf("[----------]\n"); fflush(stdout);
-     printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount); fflush(stdout);
-     printf("[----------]\n"); fflush(stdout);
- 
-     for (int i = 0; i < deviceCount; ++i)
-     {
-         cv::gpu::DeviceInfo info(i);
- 
-         printf("[----------]\n"); fflush(stdout);
-         printf("[ DEVICE   ] \t# %d %s.\n", i, info.name().c_str()); fflush(stdout);
-         printf("[          ] \tCompute capability: %d.%d\n", info.majorVersion(), info.minorVersion()); fflush(stdout);
-         printf("[          ] \tMulti Processor Count:  %d\n", info.multiProcessorCount()); fflush(stdout);
-         printf("[          ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)); fflush(stdout);
-         printf("[          ] \tFree  memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory()  / 1024.0) / 1024.0)); fflush(stdout);
-         if (!info.isCompatible())
-             printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n");
-         printf("[----------]\n"); fflush(stdout);
-     }
- }
+ #include "opencv2/ts/gpu_perf.hpp"
   
   int main(int argc, char* argv[])
   {
diff --cc modules/gpu/src/arithm.cpp
Simple merge
diff --cc modules/gpu/test/main.cpp

index 6c7c050,fbd24c5..dfb5569
--- 1/modules/gpu/test/main.cpp
--- 2/modules/gpu/test/main.cpp
+++ b/modules/gpu/test/main.cpp
@@@ -133,10 -68,9 +68,9 @@@ int main(int argc, char** argv
               return 0;
           }
   
-         printOsInfo();
           printCudaInfo();
   
- -        if (cmd.get<bool>("info"))
+ +        if (cmd.has("info"))
           {
               return 0;
           }
diff --cc modules/gpu/test/test_precomp.hpp

index 93c1df0,9be9863..0d16472
--- 1/modules/gpu/test/test_precomp.hpp
--- 2/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@@ -69,18 -69,19 +69,17 @@@
       #include <cuda.h>
       #include <cuda_runtime.h>
   
- -    #include "opencv2/ts/ts.hpp"
- -    #include "opencv2/ts/ts_perf.hpp"
+ +    #include "opencv2/core.hpp"
+ +    #include "opencv2/core/opengl.hpp"
+ +    #include "opencv2/highgui.hpp"
+ +    #include "opencv2/calib3d.hpp"
+ +    #include "opencv2/imgproc.hpp"
+ +    #include "opencv2/video.hpp"
+ +    #include "opencv2/ts.hpp"
+     #include "opencv2/ts/gpu_test.hpp"
- -
- -    #include "opencv2/core/core.hpp"
- -    #include "opencv2/core/opengl_interop.hpp"
- -    #include "opencv2/highgui/highgui.hpp"
- -    #include "opencv2/calib3d/calib3d.hpp"
- -    #include "opencv2/imgproc/imgproc.hpp"
- -    #include "opencv2/video/video.hpp"
- -    #include "opencv2/gpu/gpu.hpp"
- -    #include "opencv2/legacy/legacy.hpp"
+ +    #include "opencv2/gpu.hpp"
-     #include "opencv2/nonfree.hpp"
+ +    #include "opencv2/legacy.hpp"
   
-     #include "utility.hpp"
       #include "interpolation.hpp"
       #include "main_test_nvidia.h"
   #endif
diff --cc modules/imgproc/include/opencv2/imgproc/imgproc.hpp
Simple merge
diff --cc modules/java/generator/rst_parser.py

index 358392d,33dae44..a6ee3f0
--- 1/modules/java/generator/rst_parser.py
--- 2/modules/java/generator/rst_parser.py
+++ b/modules/java/generator/rst_parser.py
@@@ -1,7 -1,7 +1,7 @@@
   #/usr/bin/env python
   
   import os, sys, re, string, fnmatch
- allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "gpu", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "ocl", "softcascade"]
- -allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "gpu", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "ocl", "superres"]
++allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "gpu", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "ocl", "softcascade", "superres"]
   verbose = False
   show_warnings = True
   show_errors = True
diff --cc modules/ml/include/opencv2/ml.hpp

index 9592fe7,0000000..d1812c2

mode 100644,000000..100644
--- 1/modules/ml/include/opencv2/ml.hpp
--- /dev/null
+++ b/modules/ml/include/opencv2/ml.hpp
@@@ -1,2148 -1,0 +1,2150 @@@
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                        Intel License Agreement
+ +//
+ +// Copyright (C) 2000, Intel Corporation, all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of Intel Corporation may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_ML_HPP__
+ +#define __OPENCV_ML_HPP__
+ +
+ +#include "opencv2/core.hpp"
+ +#include <limits.h>
+ +
+ +#ifdef __cplusplus
+ +
+ +#include <map>
+ +#include <string>
+ +#include <iostream>
+ +
+ +// Apple defines a check() macro somewhere in the debug headers
+ +// that interferes with a method definiton in this header
+ +#undef check
+ +
+ +/****************************************************************************************\
+ +*                               Main struct definitions                                  *
+ +\****************************************************************************************/
+ +
+ +/* log(2*PI) */
+ +#define CV_LOG2PI (1.8378770664093454835606594728112)
+ +
+ +/* columns of <trainData> matrix are training samples */
+ +#define CV_COL_SAMPLE 0
+ +
+ +/* rows of <trainData> matrix are training samples */
+ +#define CV_ROW_SAMPLE 1
+ +
+ +#define CV_IS_ROW_SAMPLE(flags) ((flags) & CV_ROW_SAMPLE)
+ +
+ +struct CvVectors
+ +{
+ +    int type;
+ +    int dims, count;
+ +    CvVectors* next;
+ +    union
+ +    {
+ +        uchar** ptr;
+ +        float** fl;
+ +        double** db;
+ +    } data;
+ +};
+ +
+ +#if 0
+ +/* A structure, representing the lattice range of statmodel parameters.
+ +   It is used for optimizing statmodel parameters by cross-validation method.
+ +   The lattice is logarithmic, so <step> must be greater then 1. */
+ +typedef struct CvParamLattice
+ +{
+ +    double min_val;
+ +    double max_val;
+ +    double step;
+ +}
+ +CvParamLattice;
+ +
+ +CV_INLINE CvParamLattice cvParamLattice( double min_val, double max_val,
+ +                                         double log_step )
+ +{
+ +    CvParamLattice pl;
+ +    pl.min_val = MIN( min_val, max_val );
+ +    pl.max_val = MAX( min_val, max_val );
+ +    pl.step = MAX( log_step, 1. );
+ +    return pl;
+ +}
+ +
+ +CV_INLINE CvParamLattice cvDefaultParamLattice( void )
+ +{
+ +    CvParamLattice pl = {0,0,0};
+ +    return pl;
+ +}
+ +#endif
+ +
+ +/* Variable type */
+ +#define CV_VAR_NUMERICAL    0
+ +#define CV_VAR_ORDERED      0
+ +#define CV_VAR_CATEGORICAL  1
+ +
+ +#define CV_TYPE_NAME_ML_SVM         "opencv-ml-svm"
+ +#define CV_TYPE_NAME_ML_KNN         "opencv-ml-knn"
+ +#define CV_TYPE_NAME_ML_NBAYES      "opencv-ml-bayesian"
+ +#define CV_TYPE_NAME_ML_EM          "opencv-ml-em"
+ +#define CV_TYPE_NAME_ML_BOOSTING    "opencv-ml-boost-tree"
+ +#define CV_TYPE_NAME_ML_TREE        "opencv-ml-tree"
+ +#define CV_TYPE_NAME_ML_ANN_MLP     "opencv-ml-ann-mlp"
+ +#define CV_TYPE_NAME_ML_CNN         "opencv-ml-cnn"
+ +#define CV_TYPE_NAME_ML_RTREES      "opencv-ml-random-trees"
+ +#define CV_TYPE_NAME_ML_ERTREES     "opencv-ml-extremely-randomized-trees"
+ +#define CV_TYPE_NAME_ML_GBT         "opencv-ml-gradient-boosting-trees"
+ +
+ +#define CV_TRAIN_ERROR  0
+ +#define CV_TEST_ERROR   1
+ +
+ +class CV_EXPORTS_W CvStatModel
+ +{
+ +public:
+ +    CvStatModel();
+ +    virtual ~CvStatModel();
+ +
+ +    virtual void clear();
+ +
+ +    CV_WRAP virtual void save( const char* filename, const char* name=0 ) const;
+ +    CV_WRAP virtual void load( const char* filename, const char* name=0 );
+ +
+ +    virtual void write( CvFileStorage* storage, const char* name ) const;
+ +    virtual void read( CvFileStorage* storage, CvFileNode* node );
+ +
+ +protected:
+ +    const char* default_model_name;
+ +};
+ +
+ +/****************************************************************************************\
+ +*                                 Normal Bayes Classifier                                *
+ +\****************************************************************************************/
+ +
+ +/* The structure, representing the grid range of statmodel parameters.
+ +   It is used for optimizing statmodel accuracy by varying model parameters,
+ +   the accuracy estimate being computed by cross-validation.
+ +   The grid is logarithmic, so <step> must be greater then 1. */
+ +
+ +class CvMLData;
+ +
+ +struct CV_EXPORTS_W_MAP CvParamGrid
+ +{
+ +    // SVM params type
+ +    enum { SVM_C=0, SVM_GAMMA=1, SVM_P=2, SVM_NU=3, SVM_COEF=4, SVM_DEGREE=5 };
+ +
+ +    CvParamGrid()
+ +    {
+ +        min_val = max_val = step = 0;
+ +    }
+ +
+ +    CvParamGrid( double min_val, double max_val, double log_step );
+ +    //CvParamGrid( int param_id );
+ +    bool check() const;
+ +
+ +    CV_PROP_RW double min_val;
+ +    CV_PROP_RW double max_val;
+ +    CV_PROP_RW double step;
+ +};
+ +
+ +inline CvParamGrid::CvParamGrid( double _min_val, double _max_val, double _log_step )
+ +{
+ +    min_val = _min_val;
+ +    max_val = _max_val;
+ +    step = _log_step;
+ +}
+ +
+ +class CV_EXPORTS_W CvNormalBayesClassifier : public CvStatModel
+ +{
+ +public:
+ +    CV_WRAP CvNormalBayesClassifier();
+ +    virtual ~CvNormalBayesClassifier();
+ +
+ +    CvNormalBayesClassifier( const CvMat* trainData, const CvMat* responses,
+ +        const CvMat* varIdx=0, const CvMat* sampleIdx=0 );
+ +
+ +    virtual bool train( const CvMat* trainData, const CvMat* responses,
+ +        const CvMat* varIdx = 0, const CvMat* sampleIdx=0, bool update=false );
+ +
+ +    virtual float predict( const CvMat* samples, CV_OUT CvMat* results=0 ) const;
+ +    CV_WRAP virtual void clear();
+ +
+ +    CV_WRAP CvNormalBayesClassifier( const cv::Mat& trainData, const cv::Mat& responses,
+ +                            const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat() );
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
+ +                       const cv::Mat& varIdx = cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
+ +                       bool update=false );
+ +    CV_WRAP virtual float predict( const cv::Mat& samples, CV_OUT cv::Mat* results=0 ) const;
+ +
+ +    virtual void write( CvFileStorage* storage, const char* name ) const;
+ +    virtual void read( CvFileStorage* storage, CvFileNode* node );
+ +
+ +protected:
+ +    int     var_count, var_all;
+ +    CvMat*  var_idx;
+ +    CvMat*  cls_labels;
+ +    CvMat** count;
+ +    CvMat** sum;
+ +    CvMat** productsum;
+ +    CvMat** avg;
+ +    CvMat** inv_eigen_values;
+ +    CvMat** cov_rotate_mats;
+ +    CvMat*  c;
+ +};
+ +
+ +
+ +/****************************************************************************************\
+ +*                          K-Nearest Neighbour Classifier                                *
+ +\****************************************************************************************/
+ +
+ +// k Nearest Neighbors
+ +class CV_EXPORTS_W CvKNearest : public CvStatModel
+ +{
+ +public:
+ +
+ +    CV_WRAP CvKNearest();
+ +    virtual ~CvKNearest();
+ +
+ +    CvKNearest( const CvMat* trainData, const CvMat* responses,
+ +                const CvMat* sampleIdx=0, bool isRegression=false, int max_k=32 );
+ +
+ +    virtual bool train( const CvMat* trainData, const CvMat* responses,
+ +                        const CvMat* sampleIdx=0, bool is_regression=false,
+ +                        int maxK=32, bool updateBase=false );
+ +
+ +    virtual float find_nearest( const CvMat* samples, int k, CV_OUT CvMat* results=0,
+ +        const float** neighbors=0, CV_OUT CvMat* neighborResponses=0, CV_OUT CvMat* dist=0 ) const;
+ +
+ +    CV_WRAP CvKNearest( const cv::Mat& trainData, const cv::Mat& responses,
+ +               const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false, int max_k=32 );
+ +
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
+ +                       const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false,
+ +                       int maxK=32, bool updateBase=false );
+ +
+ +    virtual float find_nearest( const cv::Mat& samples, int k, cv::Mat* results=0,
+ +                                const float** neighbors=0, cv::Mat* neighborResponses=0,
+ +                                cv::Mat* dist=0 ) const;
+ +    CV_WRAP virtual float find_nearest( const cv::Mat& samples, int k, CV_OUT cv::Mat& results,
+ +                                        CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const;
+ +
+ +    virtual void clear();
+ +    int get_max_k() const;
+ +    int get_var_count() const;
+ +    int get_sample_count() const;
+ +    bool is_regression() const;
+ +
+ +    virtual float write_results( int k, int k1, int start, int end,
+ +        const float* neighbor_responses, const float* dist, CvMat* _results,
+ +        CvMat* _neighbor_responses, CvMat* _dist, Cv32suf* sort_buf ) const;
+ +
+ +    virtual void find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
+ +        float* neighbor_responses, const float** neighbors, float* dist ) const;
+ +
+ +protected:
+ +
+ +    int max_k, var_count;
+ +    int total;
+ +    bool regression;
+ +    CvVectors* samples;
+ +};
+ +
+ +/****************************************************************************************\
+ +*                                   Support Vector Machines                              *
+ +\****************************************************************************************/
+ +
+ +// SVM training parameters
+ +struct CV_EXPORTS_W_MAP CvSVMParams
+ +{
+ +    CvSVMParams();
+ +    CvSVMParams( int svm_type, int kernel_type,
+ +                 double degree, double gamma, double coef0,
+ +                 double Cvalue, double nu, double p,
+ +                 CvMat* class_weights, CvTermCriteria term_crit );
+ +
+ +    CV_PROP_RW int         svm_type;
+ +    CV_PROP_RW int         kernel_type;
+ +    CV_PROP_RW double      degree; // for poly
+ +    CV_PROP_RW double      gamma;  // for poly/rbf/sigmoid/chi2
+ +    CV_PROP_RW double      coef0;  // for poly/sigmoid
+ +
+ +    CV_PROP_RW double      C;  // for CV_SVM_C_SVC, CV_SVM_EPS_SVR and CV_SVM_NU_SVR
+ +    CV_PROP_RW double      nu; // for CV_SVM_NU_SVC, CV_SVM_ONE_CLASS, and CV_SVM_NU_SVR
+ +    CV_PROP_RW double      p; // for CV_SVM_EPS_SVR
+ +    CvMat*      class_weights; // for CV_SVM_C_SVC
+ +    CV_PROP_RW CvTermCriteria term_crit; // termination criteria
+ +};
+ +
+ +
+ +struct CV_EXPORTS CvSVMKernel
+ +{
+ +    typedef void (CvSVMKernel::*Calc)( int vec_count, int vec_size, const float** vecs,
+ +                                       const float* another, float* results );
+ +    CvSVMKernel();
+ +    CvSVMKernel( const CvSVMParams* params, Calc _calc_func );
+ +    virtual bool create( const CvSVMParams* params, Calc _calc_func );
+ +    virtual ~CvSVMKernel();
+ +
+ +    virtual void clear();
+ +    virtual void calc( int vcount, int n, const float** vecs, const float* another, float* results );
+ +
+ +    const CvSVMParams* params;
+ +    Calc calc_func;
+ +
+ +    virtual void calc_non_rbf_base( int vec_count, int vec_size, const float** vecs,
+ +                                    const float* another, float* results,
+ +                                    double alpha, double beta );
+ +    virtual void calc_intersec( int vcount, int var_count, const float** vecs,
+ +                            const float* another, float* results );
+ +    virtual void calc_chi2( int vec_count, int vec_size, const float** vecs,
+ +                              const float* another, float* results );
+ +    virtual void calc_linear( int vec_count, int vec_size, const float** vecs,
+ +                              const float* another, float* results );
+ +    virtual void calc_rbf( int vec_count, int vec_size, const float** vecs,
+ +                           const float* another, float* results );
+ +    virtual void calc_poly( int vec_count, int vec_size, const float** vecs,
+ +                            const float* another, float* results );
+ +    virtual void calc_sigmoid( int vec_count, int vec_size, const float** vecs,
+ +                               const float* another, float* results );
+ +};
+ +
+ +
+ +struct CvSVMKernelRow
+ +{
+ +    CvSVMKernelRow* prev;
+ +    CvSVMKernelRow* next;
+ +    float* data;
+ +};
+ +
+ +
+ +struct CvSVMSolutionInfo
+ +{
+ +    double obj;
+ +    double rho;
+ +    double upper_bound_p;
+ +    double upper_bound_n;
+ +    double r;   // for Solver_NU
+ +};
+ +
+ +class CV_EXPORTS CvSVMSolver
+ +{
+ +public:
+ +    typedef bool (CvSVMSolver::*SelectWorkingSet)( int& i, int& j );
+ +    typedef float* (CvSVMSolver::*GetRow)( int i, float* row, float* dst, bool existed );
+ +    typedef void (CvSVMSolver::*CalcRho)( double& rho, double& r );
+ +
+ +    CvSVMSolver();
+ +
+ +    CvSVMSolver( int count, int var_count, const float** samples, schar* y,
+ +                 int alpha_count, double* alpha, double Cp, double Cn,
+ +                 CvMemStorage* storage, CvSVMKernel* kernel, GetRow get_row,
+ +                 SelectWorkingSet select_working_set, CalcRho calc_rho );
+ +    virtual bool create( int count, int var_count, const float** samples, schar* y,
+ +                 int alpha_count, double* alpha, double Cp, double Cn,
+ +                 CvMemStorage* storage, CvSVMKernel* kernel, GetRow get_row,
+ +                 SelectWorkingSet select_working_set, CalcRho calc_rho );
+ +    virtual ~CvSVMSolver();
+ +
+ +    virtual void clear();
+ +    virtual bool solve_generic( CvSVMSolutionInfo& si );
+ +
+ +    virtual bool solve_c_svc( int count, int var_count, const float** samples, schar* y,
+ +                              double Cp, double Cn, CvMemStorage* storage,
+ +                              CvSVMKernel* kernel, double* alpha, CvSVMSolutionInfo& si );
+ +    virtual bool solve_nu_svc( int count, int var_count, const float** samples, schar* y,
+ +                               CvMemStorage* storage, CvSVMKernel* kernel,
+ +                               double* alpha, CvSVMSolutionInfo& si );
+ +    virtual bool solve_one_class( int count, int var_count, const float** samples,
+ +                                  CvMemStorage* storage, CvSVMKernel* kernel,
+ +                                  double* alpha, CvSVMSolutionInfo& si );
+ +
+ +    virtual bool solve_eps_svr( int count, int var_count, const float** samples, const float* y,
+ +                                CvMemStorage* storage, CvSVMKernel* kernel,
+ +                                double* alpha, CvSVMSolutionInfo& si );
+ +
+ +    virtual bool solve_nu_svr( int count, int var_count, const float** samples, const float* y,
+ +                               CvMemStorage* storage, CvSVMKernel* kernel,
+ +                               double* alpha, CvSVMSolutionInfo& si );
+ +
+ +    virtual float* get_row_base( int i, bool* _existed );
+ +    virtual float* get_row( int i, float* dst );
+ +
+ +    int sample_count;
+ +    int var_count;
+ +    int cache_size;
+ +    int cache_line_size;
+ +    const float** samples;
+ +    const CvSVMParams* params;
+ +    CvMemStorage* storage;
+ +    CvSVMKernelRow lru_list;
+ +    CvSVMKernelRow* rows;
+ +
+ +    int alpha_count;
+ +
+ +    double* G;
+ +    double* alpha;
+ +
+ +    // -1 - lower bound, 0 - free, 1 - upper bound
+ +    schar* alpha_status;
+ +
+ +    schar* y;
+ +    double* b;
+ +    float* buf[2];
+ +    double eps;
+ +    int max_iter;
+ +    double C[2];  // C[0] == Cn, C[1] == Cp
+ +    CvSVMKernel* kernel;
+ +
+ +    SelectWorkingSet select_working_set_func;
+ +    CalcRho calc_rho_func;
+ +    GetRow get_row_func;
+ +
+ +    virtual bool select_working_set( int& i, int& j );
+ +    virtual bool select_working_set_nu_svm( int& i, int& j );
+ +    virtual void calc_rho( double& rho, double& r );
+ +    virtual void calc_rho_nu_svm( double& rho, double& r );
+ +
+ +    virtual float* get_row_svc( int i, float* row, float* dst, bool existed );
+ +    virtual float* get_row_one_class( int i, float* row, float* dst, bool existed );
+ +    virtual float* get_row_svr( int i, float* row, float* dst, bool existed );
+ +};
+ +
+ +
+ +struct CvSVMDecisionFunc
+ +{
+ +    double rho;
+ +    int sv_count;
+ +    double* alpha;
+ +    int* sv_index;
+ +};
+ +
+ +
+ +// SVM model
+ +class CV_EXPORTS_W CvSVM : public CvStatModel
+ +{
+ +public:
+ +    // SVM type
+ +    enum { C_SVC=100, NU_SVC=101, ONE_CLASS=102, EPS_SVR=103, NU_SVR=104 };
+ +
+ +    // SVM kernel type
+ +    enum { LINEAR=0, POLY=1, RBF=2, SIGMOID=3, CHI2=4, INTER=5 };
+ +
+ +    // SVM params type
+ +    enum { C=0, GAMMA=1, P=2, NU=3, COEF=4, DEGREE=5 };
+ +
+ +    CV_WRAP CvSVM();
+ +    virtual ~CvSVM();
+ +
+ +    CvSVM( const CvMat* trainData, const CvMat* responses,
+ +           const CvMat* varIdx=0, const CvMat* sampleIdx=0,
+ +           CvSVMParams params=CvSVMParams() );
+ +
+ +    virtual bool train( const CvMat* trainData, const CvMat* responses,
+ +                        const CvMat* varIdx=0, const CvMat* sampleIdx=0,
+ +                        CvSVMParams params=CvSVMParams() );
+ +
+ +    virtual bool train_auto( const CvMat* trainData, const CvMat* responses,
+ +        const CvMat* varIdx, const CvMat* sampleIdx, CvSVMParams params,
+ +        int kfold = 10,
+ +        CvParamGrid Cgrid      = get_default_grid(CvSVM::C),
+ +        CvParamGrid gammaGrid  = get_default_grid(CvSVM::GAMMA),
+ +        CvParamGrid pGrid      = get_default_grid(CvSVM::P),
+ +        CvParamGrid nuGrid     = get_default_grid(CvSVM::NU),
+ +        CvParamGrid coeffGrid  = get_default_grid(CvSVM::COEF),
+ +        CvParamGrid degreeGrid = get_default_grid(CvSVM::DEGREE),
+ +        bool balanced=false );
+ +
+ +    virtual float predict( const CvMat* sample, bool returnDFVal=false ) const;
+ +    virtual float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
+ +
+ +    CV_WRAP CvSVM( const cv::Mat& trainData, const cv::Mat& responses,
+ +          const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
+ +          CvSVMParams params=CvSVMParams() );
+ +
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
+ +                       const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
+ +                       CvSVMParams params=CvSVMParams() );
+ +
+ +    CV_WRAP virtual bool train_auto( const cv::Mat& trainData, const cv::Mat& responses,
+ +                            const cv::Mat& varIdx, const cv::Mat& sampleIdx, CvSVMParams params,
+ +                            int k_fold = 10,
+ +                            CvParamGrid Cgrid      = CvSVM::get_default_grid(CvSVM::C),
+ +                            CvParamGrid gammaGrid  = CvSVM::get_default_grid(CvSVM::GAMMA),
+ +                            CvParamGrid pGrid      = CvSVM::get_default_grid(CvSVM::P),
+ +                            CvParamGrid nuGrid     = CvSVM::get_default_grid(CvSVM::NU),
+ +                            CvParamGrid coeffGrid  = CvSVM::get_default_grid(CvSVM::COEF),
+ +                            CvParamGrid degreeGrid = CvSVM::get_default_grid(CvSVM::DEGREE),
+ +                            bool balanced=false);
+ +    CV_WRAP virtual float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
+ +    CV_WRAP_AS(predict_all) virtual void predict( cv::InputArray samples, cv::OutputArray results ) const;
+ +
+ +    CV_WRAP virtual int get_support_vector_count() const;
+ +    virtual const float* get_support_vector(int i) const;
+ +    virtual CvSVMParams get_params() const { return params; };
+ +    CV_WRAP virtual void clear();
+ +
+ +    static CvParamGrid get_default_grid( int param_id );
+ +
+ +    virtual void write( CvFileStorage* storage, const char* name ) const;
+ +    virtual void read( CvFileStorage* storage, CvFileNode* node );
+ +    CV_WRAP int get_var_count() const { return var_idx ? var_idx->cols : var_all; }
+ +
+ +protected:
+ +
+ +    virtual bool set_params( const CvSVMParams& params );
+ +    virtual bool train1( int sample_count, int var_count, const float** samples,
+ +                    const void* responses, double Cp, double Cn,
+ +                    CvMemStorage* _storage, double* alpha, double& rho );
+ +    virtual bool do_train( int svm_type, int sample_count, int var_count, const float** samples,
+ +                    const CvMat* responses, CvMemStorage* _storage, double* alpha );
+ +    virtual void create_kernel();
+ +    virtual void create_solver();
+ +
+ +    virtual float predict( const float* row_sample, int row_len, bool returnDFVal=false ) const;
+ +
+ +    virtual void write_params( CvFileStorage* fs ) const;
+ +    virtual void read_params( CvFileStorage* fs, CvFileNode* node );
+ +
++    void optimize_linear_svm();
++
+ +    CvSVMParams params;
+ +    CvMat* class_labels;
+ +    int var_all;
+ +    float** sv;
+ +    int sv_total;
+ +    CvMat* var_idx;
+ +    CvMat* class_weights;
+ +    CvSVMDecisionFunc* decision_func;
+ +    CvMemStorage* storage;
+ +
+ +    CvSVMSolver* solver;
+ +    CvSVMKernel* kernel;
+ +};
+ +
+ +/****************************************************************************************\
+ +*                              Expectation - Maximization                                *
+ +\****************************************************************************************/
+ +namespace cv
+ +{
+ +class CV_EXPORTS_W EM : public Algorithm
+ +{
+ +public:
+ +    // Type of covariation matrices
+ +    enum {COV_MAT_SPHERICAL=0, COV_MAT_DIAGONAL=1, COV_MAT_GENERIC=2, COV_MAT_DEFAULT=COV_MAT_DIAGONAL};
+ +
+ +    // Default parameters
+ +    enum {DEFAULT_NCLUSTERS=5, DEFAULT_MAX_ITERS=100};
+ +
+ +    // The initial step
+ +    enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
+ +
+ +    CV_WRAP EM(int nclusters=EM::DEFAULT_NCLUSTERS, int covMatType=EM::COV_MAT_DIAGONAL,
+ +       const TermCriteria& termCrit=TermCriteria(TermCriteria::COUNT+TermCriteria::EPS,
+ +                                                 EM::DEFAULT_MAX_ITERS, FLT_EPSILON));
+ +
+ +    virtual ~EM();
+ +    CV_WRAP virtual void clear();
+ +
+ +    CV_WRAP virtual bool train(InputArray samples,
+ +                       OutputArray logLikelihoods=noArray(),
+ +                       OutputArray labels=noArray(),
+ +                       OutputArray probs=noArray());
+ +
+ +    CV_WRAP virtual bool trainE(InputArray samples,
+ +                        InputArray means0,
+ +                        InputArray covs0=noArray(),
+ +                        InputArray weights0=noArray(),
+ +                        OutputArray logLikelihoods=noArray(),
+ +                        OutputArray labels=noArray(),
+ +                        OutputArray probs=noArray());
+ +
+ +    CV_WRAP virtual bool trainM(InputArray samples,
+ +                        InputArray probs0,
+ +                        OutputArray logLikelihoods=noArray(),
+ +                        OutputArray labels=noArray(),
+ +                        OutputArray probs=noArray());
+ +
+ +    CV_WRAP Vec2d predict(InputArray sample,
+ +                OutputArray probs=noArray()) const;
+ +
+ +    CV_WRAP bool isTrained() const;
+ +
+ +    AlgorithmInfo* info() const;
+ +    virtual void read(const FileNode& fn);
+ +
+ +protected:
+ +
+ +    virtual void setTrainData(int startStep, const Mat& samples,
+ +                              const Mat* probs0,
+ +                              const Mat* means0,
+ +                              const std::vector<Mat>* covs0,
+ +                              const Mat* weights0);
+ +
+ +    bool doTrain(int startStep,
+ +                 OutputArray logLikelihoods,
+ +                 OutputArray labels,
+ +                 OutputArray probs);
+ +    virtual void eStep();
+ +    virtual void mStep();
+ +
+ +    void clusterTrainSamples();
+ +    void decomposeCovs();
+ +    void computeLogWeightDivDet();
+ +
+ +    Vec2d computeProbabilities(const Mat& sample, Mat* probs) const;
+ +
+ +    // all inner matrices have type CV_64FC1
+ +    CV_PROP_RW int nclusters;
+ +    CV_PROP_RW int covMatType;
+ +    CV_PROP_RW int maxIters;
+ +    CV_PROP_RW double epsilon;
+ +
+ +    Mat trainSamples;
+ +    Mat trainProbs;
+ +    Mat trainLogLikelihoods;
+ +    Mat trainLabels;
+ +
+ +    CV_PROP Mat weights;
+ +    CV_PROP Mat means;
+ +    CV_PROP std::vector<Mat> covs;
+ +
+ +    std::vector<Mat> covsEigenValues;
+ +    std::vector<Mat> covsRotateMats;
+ +    std::vector<Mat> invCovsEigenValues;
+ +    Mat logWeightDivDet;
+ +};
+ +} // namespace cv
+ +
+ +/****************************************************************************************\
+ +*                                      Decision Tree                                     *
+ +\****************************************************************************************/\
+ +struct CvPair16u32s
+ +{
+ +    unsigned short* u;
+ +    int* i;
+ +};
+ +
+ +
+ +#define CV_DTREE_CAT_DIR(idx,subset) \
+ +    (2*((subset[(idx)>>5]&(1 << ((idx) & 31)))==0)-1)
+ +
+ +struct CvDTreeSplit
+ +{
+ +    int var_idx;
+ +    int condensed_idx;
+ +    int inversed;
+ +    float quality;
+ +    CvDTreeSplit* next;
+ +    union
+ +    {
+ +        int subset[2];
+ +        struct
+ +        {
+ +            float c;
+ +            int split_point;
+ +        }
+ +        ord;
+ +    };
+ +};
+ +
+ +struct CvDTreeNode
+ +{
+ +    int class_idx;
+ +    int Tn;
+ +    double value;
+ +
+ +    CvDTreeNode* parent;
+ +    CvDTreeNode* left;
+ +    CvDTreeNode* right;
+ +
+ +    CvDTreeSplit* split;
+ +
+ +    int sample_count;
+ +    int depth;
+ +    int* num_valid;
+ +    int offset;
+ +    int buf_idx;
+ +    double maxlr;
+ +
+ +    // global pruning data
+ +    int complexity;
+ +    double alpha;
+ +    double node_risk, tree_risk, tree_error;
+ +
+ +    // cross-validation pruning data
+ +    int* cv_Tn;
+ +    double* cv_node_risk;
+ +    double* cv_node_error;
+ +
+ +    int get_num_valid(int vi) { return num_valid ? num_valid[vi] : sample_count; }
+ +    void set_num_valid(int vi, int n) { if( num_valid ) num_valid[vi] = n; }
+ +};
+ +
+ +
+ +struct CV_EXPORTS_W_MAP CvDTreeParams
+ +{
+ +    CV_PROP_RW int   max_categories;
+ +    CV_PROP_RW int   max_depth;
+ +    CV_PROP_RW int   min_sample_count;
+ +    CV_PROP_RW int   cv_folds;
+ +    CV_PROP_RW bool  use_surrogates;
+ +    CV_PROP_RW bool  use_1se_rule;
+ +    CV_PROP_RW bool  truncate_pruned_tree;
+ +    CV_PROP_RW float regression_accuracy;
+ +    const float* priors;
+ +
+ +    CvDTreeParams();
+ +    CvDTreeParams( int max_depth, int min_sample_count,
+ +                   float regression_accuracy, bool use_surrogates,
+ +                   int max_categories, int cv_folds,
+ +                   bool use_1se_rule, bool truncate_pruned_tree,
+ +                   const float* priors );
+ +};
+ +
+ +
+ +struct CV_EXPORTS CvDTreeTrainData
+ +{
+ +    CvDTreeTrainData();
+ +    CvDTreeTrainData( const CvMat* trainData, int tflag,
+ +                      const CvMat* responses, const CvMat* varIdx=0,
+ +                      const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                      const CvMat* missingDataMask=0,
+ +                      const CvDTreeParams& params=CvDTreeParams(),
+ +                      bool _shared=false, bool _add_labels=false );
+ +    virtual ~CvDTreeTrainData();
+ +
+ +    virtual void set_data( const CvMat* trainData, int tflag,
+ +                          const CvMat* responses, const CvMat* varIdx=0,
+ +                          const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                          const CvMat* missingDataMask=0,
+ +                          const CvDTreeParams& params=CvDTreeParams(),
+ +                          bool _shared=false, bool _add_labels=false,
+ +                          bool _update_data=false );
+ +    virtual void do_responses_copy();
+ +
+ +    virtual void get_vectors( const CvMat* _subsample_idx,
+ +         float* values, uchar* missing, float* responses, bool get_class_idx=false );
+ +
+ +    virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );
+ +
+ +    virtual void write_params( CvFileStorage* fs ) const;
+ +    virtual void read_params( CvFileStorage* fs, CvFileNode* node );
+ +
+ +    // release all the data
+ +    virtual void clear();
+ +
+ +    int get_num_classes() const;
+ +    int get_var_type(int vi) const;
+ +    int get_work_var_count() const {return work_var_count;}
+ +
+ +    virtual const float* get_ord_responses( CvDTreeNode* n, float* values_buf, int* sample_indices_buf );
+ +    virtual const int* get_class_labels( CvDTreeNode* n, int* labels_buf );
+ +    virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
+ +    virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
+ +    virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
+ +    virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* sorted_indices_buf,
+ +                                   const float** ord_values, const int** sorted_indices, int* sample_indices_buf );
+ +    virtual int get_child_buf_idx( CvDTreeNode* n );
+ +
+ +    ////////////////////////////////////
+ +
+ +    virtual bool set_params( const CvDTreeParams& params );
+ +    virtual CvDTreeNode* new_node( CvDTreeNode* parent, int count,
+ +                                   int storage_idx, int offset );
+ +
+ +    virtual CvDTreeSplit* new_split_ord( int vi, float cmp_val,
+ +                int split_point, int inversed, float quality );
+ +    virtual CvDTreeSplit* new_split_cat( int vi, float quality );
+ +    virtual void free_node_data( CvDTreeNode* node );
+ +    virtual void free_train_data();
+ +    virtual void free_node( CvDTreeNode* node );
+ +
+ +    int sample_count, var_all, var_count, max_c_count;
+ +    int ord_var_count, cat_var_count, work_var_count;
+ +    bool have_labels, have_priors;
+ +    bool is_classifier;
+ +    int tflag;
+ +
+ +    const CvMat* train_data;
+ +    const CvMat* responses;
+ +    CvMat* responses_copy; // used in Boosting
+ +
+ +    int buf_count, buf_size; // buf_size is obsolete, please do not use it, use expression ((int64)buf->rows * (int64)buf->cols / buf_count) instead
+ +    bool shared;
+ +    int is_buf_16u;
+ +
+ +    CvMat* cat_count;
+ +    CvMat* cat_ofs;
+ +    CvMat* cat_map;
+ +
+ +    CvMat* counts;
+ +    CvMat* buf;
+ +    inline size_t get_length_subbuf() const
+ +    {
+ +        size_t res = (size_t)(work_var_count + 1) * (size_t)sample_count;
+ +        return res;
+ +    }
+ +
+ +    CvMat* direction;
+ +    CvMat* split_buf;
+ +
+ +    CvMat* var_idx;
+ +    CvMat* var_type; // i-th element =
+ +                     //   k<0  - ordered
+ +                     //   k>=0 - categorical, see k-th element of cat_* arrays
+ +    CvMat* priors;
+ +    CvMat* priors_mult;
+ +
+ +    CvDTreeParams params;
+ +
+ +    CvMemStorage* tree_storage;
+ +    CvMemStorage* temp_storage;
+ +
+ +    CvDTreeNode* data_root;
+ +
+ +    CvSet* node_heap;
+ +    CvSet* split_heap;
+ +    CvSet* cv_heap;
+ +    CvSet* nv_heap;
+ +
+ +    cv::RNG* rng;
+ +};
+ +
+ +class CvDTree;
+ +class CvForestTree;
+ +
+ +namespace cv
+ +{
+ +    struct DTreeBestSplitFinder;
+ +    struct ForestTreeBestSplitFinder;
+ +}
+ +
+ +class CV_EXPORTS_W CvDTree : public CvStatModel
+ +{
+ +public:
+ +    CV_WRAP CvDTree();
+ +    virtual ~CvDTree();
+ +
+ +    virtual bool train( const CvMat* trainData, int tflag,
+ +                        const CvMat* responses, const CvMat* varIdx=0,
+ +                        const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                        const CvMat* missingDataMask=0,
+ +                        CvDTreeParams params=CvDTreeParams() );
+ +
+ +    virtual bool train( CvMLData* trainData, CvDTreeParams params=CvDTreeParams() );
+ +
+ +    // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
+ +    virtual float calc_error( CvMLData* trainData, int type, std::vector<float> *resp = 0 );
+ +
+ +    virtual bool train( CvDTreeTrainData* trainData, const CvMat* subsampleIdx );
+ +
+ +    virtual CvDTreeNode* predict( const CvMat* sample, const CvMat* missingDataMask=0,
+ +                                  bool preprocessedInput=false ) const;
+ +
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
+ +                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
+ +                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
+ +                       const cv::Mat& missingDataMask=cv::Mat(),
+ +                       CvDTreeParams params=CvDTreeParams() );
+ +
+ +    CV_WRAP virtual CvDTreeNode* predict( const cv::Mat& sample, const cv::Mat& missingDataMask=cv::Mat(),
+ +                                  bool preprocessedInput=false ) const;
+ +    CV_WRAP virtual cv::Mat getVarImportance();
+ +
+ +    virtual const CvMat* get_var_importance();
+ +    CV_WRAP virtual void clear();
+ +
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node );
+ +    virtual void write( CvFileStorage* fs, const char* name ) const;
+ +
+ +    // special read & write methods for trees in the tree ensembles
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node,
+ +                       CvDTreeTrainData* data );
+ +    virtual void write( CvFileStorage* fs ) const;
+ +
+ +    const CvDTreeNode* get_root() const;
+ +    int get_pruned_tree_idx() const;
+ +    CvDTreeTrainData* get_data();
+ +
+ +protected:
+ +    friend struct cv::DTreeBestSplitFinder;
+ +
+ +    virtual bool do_train( const CvMat* _subsample_idx );
+ +
+ +    virtual void try_split_node( CvDTreeNode* n );
+ +    virtual void split_node_data( CvDTreeNode* n );
+ +    virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
+ +    virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
+ +                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
+ +                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
+ +                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
+ +                            float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
+ +    virtual double calc_node_dir( CvDTreeNode* node );
+ +    virtual void complete_node_dir( CvDTreeNode* node );
+ +    virtual void cluster_categories( const int* vectors, int vector_count,
+ +        int var_count, int* sums, int k, int* cluster_labels );
+ +
+ +    virtual void calc_node_value( CvDTreeNode* node );
+ +
+ +    virtual void prune_cv();
+ +    virtual double update_tree_rnc( int T, int fold );
+ +    virtual int cut_tree( int T, int fold, double min_alpha );
+ +    virtual void free_prune_data(bool cut_tree);
+ +    virtual void free_tree();
+ +
+ +    virtual void write_node( CvFileStorage* fs, CvDTreeNode* node ) const;
+ +    virtual void write_split( CvFileStorage* fs, CvDTreeSplit* split ) const;
+ +    virtual CvDTreeNode* read_node( CvFileStorage* fs, CvFileNode* node, CvDTreeNode* parent );
+ +    virtual CvDTreeSplit* read_split( CvFileStorage* fs, CvFileNode* node );
+ +    virtual void write_tree_nodes( CvFileStorage* fs ) const;
+ +    virtual void read_tree_nodes( CvFileStorage* fs, CvFileNode* node );
+ +
+ +    CvDTreeNode* root;
+ +    CvMat* var_importance;
+ +    CvDTreeTrainData* data;
+ +
+ +public:
+ +    int pruned_tree_idx;
+ +};
+ +
+ +
+ +/****************************************************************************************\
+ +*                                   Random Trees Classifier                              *
+ +\****************************************************************************************/
+ +
+ +class CvRTrees;
+ +
+ +class CV_EXPORTS CvForestTree: public CvDTree
+ +{
+ +public:
+ +    CvForestTree();
+ +    virtual ~CvForestTree();
+ +
+ +    virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx, CvRTrees* forest );
+ +
+ +    virtual int get_var_count() const {return data ? data->var_count : 0;}
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node, CvRTrees* forest, CvDTreeTrainData* _data );
+ +
+ +    /* dummy methods to avoid warnings: BEGIN */
+ +    virtual bool train( const CvMat* trainData, int tflag,
+ +                        const CvMat* responses, const CvMat* varIdx=0,
+ +                        const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                        const CvMat* missingDataMask=0,
+ +                        CvDTreeParams params=CvDTreeParams() );
+ +
+ +    virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx );
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node );
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node,
+ +                       CvDTreeTrainData* data );
+ +    /* dummy methods to avoid warnings: END */
+ +
+ +protected:
+ +    friend struct cv::ForestTreeBestSplitFinder;
+ +
+ +    virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
+ +    CvRTrees* forest;
+ +};
+ +
+ +
+ +struct CV_EXPORTS_W_MAP CvRTParams : public CvDTreeParams
+ +{
+ +    //Parameters for the forest
+ +    CV_PROP_RW bool calc_var_importance; // true <=> RF processes variable importance
+ +    CV_PROP_RW int nactive_vars;
+ +    CV_PROP_RW CvTermCriteria term_crit;
+ +
+ +    CvRTParams();
+ +    CvRTParams( int max_depth, int min_sample_count,
+ +                float regression_accuracy, bool use_surrogates,
+ +                int max_categories, const float* priors, bool calc_var_importance,
+ +                int nactive_vars, int max_num_of_trees_in_the_forest,
+ +                float forest_accuracy, int termcrit_type );
+ +};
+ +
+ +
+ +class CV_EXPORTS_W CvRTrees : public CvStatModel
+ +{
+ +public:
+ +    CV_WRAP CvRTrees();
+ +    virtual ~CvRTrees();
+ +    virtual bool train( const CvMat* trainData, int tflag,
+ +                        const CvMat* responses, const CvMat* varIdx=0,
+ +                        const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                        const CvMat* missingDataMask=0,
+ +                        CvRTParams params=CvRTParams() );
+ +
+ +    virtual bool train( CvMLData* data, CvRTParams params=CvRTParams() );
+ +    virtual float predict( const CvMat* sample, const CvMat* missing = 0 ) const;
+ +    virtual float predict_prob( const CvMat* sample, const CvMat* missing = 0 ) const;
+ +
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
+ +                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
+ +                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
+ +                       const cv::Mat& missingDataMask=cv::Mat(),
+ +                       CvRTParams params=CvRTParams() );
+ +    CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing = cv::Mat() ) const;
+ +    CV_WRAP virtual float predict_prob( const cv::Mat& sample, const cv::Mat& missing = cv::Mat() ) const;
+ +    CV_WRAP virtual cv::Mat getVarImportance();
+ +
+ +    CV_WRAP virtual void clear();
+ +
+ +    virtual const CvMat* get_var_importance();
+ +    virtual float get_proximity( const CvMat* sample1, const CvMat* sample2,
+ +        const CvMat* missing1 = 0, const CvMat* missing2 = 0 ) const;
+ +
+ +    virtual float calc_error( CvMLData* data, int type , std::vector<float>* resp = 0 ); // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
+ +
+ +    virtual float get_train_error();
+ +
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node );
+ +    virtual void write( CvFileStorage* fs, const char* name ) const;
+ +
+ +    CvMat* get_active_var_mask();
+ +    CvRNG* get_rng();
+ +
+ +    int get_tree_count() const;
+ +    CvForestTree* get_tree(int i) const;
+ +
+ +protected:
+ +    virtual std::string getName() const;
+ +
+ +    virtual bool grow_forest( const CvTermCriteria term_crit );
+ +
+ +    // array of the trees of the forest
+ +    CvForestTree** trees;
+ +    CvDTreeTrainData* data;
+ +    int ntrees;
+ +    int nclasses;
+ +    double oob_error;
+ +    CvMat* var_importance;
+ +    int nsamples;
+ +
+ +    cv::RNG* rng;
+ +    CvMat* active_var_mask;
+ +};
+ +
+ +/****************************************************************************************\
+ +*                           Extremely randomized trees Classifier                        *
+ +\****************************************************************************************/
+ +struct CV_EXPORTS CvERTreeTrainData : public CvDTreeTrainData
+ +{
+ +    virtual void set_data( const CvMat* trainData, int tflag,
+ +                          const CvMat* responses, const CvMat* varIdx=0,
+ +                          const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                          const CvMat* missingDataMask=0,
+ +                          const CvDTreeParams& params=CvDTreeParams(),
+ +                          bool _shared=false, bool _add_labels=false,
+ +                          bool _update_data=false );
+ +    virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* missing_buf,
+ +                                   const float** ord_values, const int** missing, int* sample_buf = 0 );
+ +    virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
+ +    virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
+ +    virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
+ +    virtual void get_vectors( const CvMat* _subsample_idx, float* values, uchar* missing,
+ +                              float* responses, bool get_class_idx=false );
+ +    virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );
+ +    const CvMat* missing_mask;
+ +};
+ +
+ +class CV_EXPORTS CvForestERTree : public CvForestTree
+ +{
+ +protected:
+ +    virtual double calc_node_dir( CvDTreeNode* node );
+ +    virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual void split_node_data( CvDTreeNode* n );
+ +};
+ +
+ +class CV_EXPORTS_W CvERTrees : public CvRTrees
+ +{
+ +public:
+ +    CV_WRAP CvERTrees();
+ +    virtual ~CvERTrees();
+ +    virtual bool train( const CvMat* trainData, int tflag,
+ +                        const CvMat* responses, const CvMat* varIdx=0,
+ +                        const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                        const CvMat* missingDataMask=0,
+ +                        CvRTParams params=CvRTParams());
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
+ +                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
+ +                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
+ +                       const cv::Mat& missingDataMask=cv::Mat(),
+ +                       CvRTParams params=CvRTParams());
+ +    virtual bool train( CvMLData* data, CvRTParams params=CvRTParams() );
+ +protected:
+ +    virtual std::string getName() const;
+ +    virtual bool grow_forest( const CvTermCriteria term_crit );
+ +};
+ +
+ +
+ +/****************************************************************************************\
+ +*                                   Boosted tree classifier                              *
+ +\****************************************************************************************/
+ +
+ +struct CV_EXPORTS_W_MAP CvBoostParams : public CvDTreeParams
+ +{
+ +    CV_PROP_RW int boost_type;
+ +    CV_PROP_RW int weak_count;
+ +    CV_PROP_RW int split_criteria;
+ +    CV_PROP_RW double weight_trim_rate;
+ +
+ +    CvBoostParams();
+ +    CvBoostParams( int boost_type, int weak_count, double weight_trim_rate,
+ +                   int max_depth, bool use_surrogates, const float* priors );
+ +};
+ +
+ +
+ +class CvBoost;
+ +
+ +class CV_EXPORTS CvBoostTree: public CvDTree
+ +{
+ +public:
+ +    CvBoostTree();
+ +    virtual ~CvBoostTree();
+ +
+ +    virtual bool train( CvDTreeTrainData* trainData,
+ +                        const CvMat* subsample_idx, CvBoost* ensemble );
+ +
+ +    virtual void scale( double s );
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node,
+ +                       CvBoost* ensemble, CvDTreeTrainData* _data );
+ +    virtual void clear();
+ +
+ +    /* dummy methods to avoid warnings: BEGIN */
+ +    virtual bool train( const CvMat* trainData, int tflag,
+ +                        const CvMat* responses, const CvMat* varIdx=0,
+ +                        const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +                        const CvMat* missingDataMask=0,
+ +                        CvDTreeParams params=CvDTreeParams() );
+ +    virtual bool train( CvDTreeTrainData* trainData, const CvMat* _subsample_idx );
+ +
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node );
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node,
+ +                       CvDTreeTrainData* data );
+ +    /* dummy methods to avoid warnings: END */
+ +
+ +protected:
+ +
+ +    virtual void try_split_node( CvDTreeNode* n );
+ +    virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
+ +        float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ +    virtual void calc_node_value( CvDTreeNode* n );
+ +    virtual double calc_node_dir( CvDTreeNode* n );
+ +
+ +    CvBoost* ensemble;
+ +};
+ +
+ +
+ +class CV_EXPORTS_W CvBoost : public CvStatModel
+ +{
+ +public:
+ +    // Boosting type
+ +    enum { DISCRETE=0, REAL=1, LOGIT=2, GENTLE=3 };
+ +
+ +    // Splitting criteria
+ +    enum { DEFAULT=0, GINI=1, MISCLASS=3, SQERR=4 };
+ +
+ +    CV_WRAP CvBoost();
+ +    virtual ~CvBoost();
+ +
+ +    CvBoost( const CvMat* trainData, int tflag,
+ +             const CvMat* responses, const CvMat* varIdx=0,
+ +             const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +             const CvMat* missingDataMask=0,
+ +             CvBoostParams params=CvBoostParams() );
+ +
+ +    virtual bool train( const CvMat* trainData, int tflag,
+ +             const CvMat* responses, const CvMat* varIdx=0,
+ +             const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +             const CvMat* missingDataMask=0,
+ +             CvBoostParams params=CvBoostParams(),
+ +             bool update=false );
+ +
+ +    virtual bool train( CvMLData* data,
+ +             CvBoostParams params=CvBoostParams(),
+ +             bool update=false );
+ +
+ +    virtual float predict( const CvMat* sample, const CvMat* missing=0,
+ +                           CvMat* weak_responses=0, CvSlice slice=CV_WHOLE_SEQ,
+ +                           bool raw_mode=false, bool return_sum=false ) const;
+ +
+ +    CV_WRAP CvBoost( const cv::Mat& trainData, int tflag,
+ +            const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
+ +            const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
+ +            const cv::Mat& missingDataMask=cv::Mat(),
+ +            CvBoostParams params=CvBoostParams() );
+ +
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
+ +                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
+ +                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
+ +                       const cv::Mat& missingDataMask=cv::Mat(),
+ +                       CvBoostParams params=CvBoostParams(),
+ +                       bool update=false );
+ +
+ +    CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing=cv::Mat(),
+ +                                   const cv::Range& slice=cv::Range::all(), bool rawMode=false,
+ +                                   bool returnSum=false ) const;
+ +
+ +    virtual float calc_error( CvMLData* _data, int type , std::vector<float> *resp = 0 ); // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}
+ +
+ +    CV_WRAP virtual void prune( CvSlice slice );
+ +
+ +    CV_WRAP virtual void clear();
+ +
+ +    virtual void write( CvFileStorage* storage, const char* name ) const;
+ +    virtual void read( CvFileStorage* storage, CvFileNode* node );
+ +    virtual const CvMat* get_active_vars(bool absolute_idx=true);
+ +
+ +    CvSeq* get_weak_predictors();
+ +
+ +    CvMat* get_weights();
+ +    CvMat* get_subtree_weights();
+ +    CvMat* get_weak_response();
+ +    const CvBoostParams& get_params() const;
+ +    const CvDTreeTrainData* get_data() const;
+ +
+ +protected:
+ +
+ +    virtual bool set_params( const CvBoostParams& params );
+ +    virtual void update_weights( CvBoostTree* tree );
+ +    virtual void trim_weights();
+ +    virtual void write_params( CvFileStorage* fs ) const;
+ +    virtual void read_params( CvFileStorage* fs, CvFileNode* node );
+ +
+ +    virtual void initialize_weights(double (&p)[2]);
+ +
+ +    CvDTreeTrainData* data;
+ +    CvBoostParams params;
+ +    CvSeq* weak;
+ +
+ +    CvMat* active_vars;
+ +    CvMat* active_vars_abs;
+ +    bool have_active_cat_vars;
+ +
+ +    CvMat* orig_response;
+ +    CvMat* sum_response;
+ +    CvMat* weak_eval;
+ +    CvMat* subsample_mask;
+ +    CvMat* weights;
+ +    CvMat* subtree_weights;
+ +    bool have_subsample;
+ +};
+ +
+ +
+ +/****************************************************************************************\
+ +*                                   Gradient Boosted Trees                               *
+ +\****************************************************************************************/
+ +
+ +// DataType: STRUCT CvGBTreesParams
+ +// Parameters of GBT (Gradient Boosted trees model), including single
+ +// tree settings and ensemble parameters.
+ +//
+ +// weak_count          - count of trees in the ensemble
+ +// loss_function_type  - loss function used for ensemble training
+ +// subsample_portion   - portion of whole training set used for
+ +//                       every single tree training.
+ +//                       subsample_portion value is in (0.0, 1.0].
+ +//                       subsample_portion == 1.0 when whole dataset is
+ +//                       used on each step. Count of sample used on each
+ +//                       step is computed as
+ +//                       int(total_samples_count * subsample_portion).
+ +// shrinkage           - regularization parameter.
+ +//                       Each tree prediction is multiplied on shrinkage value.
+ +
+ +
+ +struct CV_EXPORTS_W_MAP CvGBTreesParams : public CvDTreeParams
+ +{
+ +    CV_PROP_RW int weak_count;
+ +    CV_PROP_RW int loss_function_type;
+ +    CV_PROP_RW float subsample_portion;
+ +    CV_PROP_RW float shrinkage;
+ +
+ +    CvGBTreesParams();
+ +    CvGBTreesParams( int loss_function_type, int weak_count, float shrinkage,
+ +        float subsample_portion, int max_depth, bool use_surrogates );
+ +};
+ +
+ +// DataType: CLASS CvGBTrees
+ +// Gradient Boosting Trees (GBT) algorithm implementation.
+ +//
+ +// data             - training dataset
+ +// params           - parameters of the CvGBTrees
+ +// weak             - array[0..(class_count-1)] of CvSeq
+ +//                    for storing tree ensembles
+ +// orig_response    - original responses of the training set samples
+ +// sum_response     - predicitons of the current model on the training dataset.
+ +//                    this matrix is updated on every iteration.
+ +// sum_response_tmp - predicitons of the model on the training set on the next
+ +//                    step. On every iteration values of sum_responses_tmp are
+ +//                    computed via sum_responses values. When the current
+ +//                    step is complete sum_response values become equal to
+ +//                    sum_responses_tmp.
+ +// sampleIdx       - indices of samples used for training the ensemble.
+ +//                    CvGBTrees training procedure takes a set of samples
+ +//                    (train_data) and a set of responses (responses).
+ +//                    Only pairs (train_data[i], responses[i]), where i is
+ +//                    in sample_idx are used for training the ensemble.
+ +// subsample_train  - indices of samples used for training a single decision
+ +//                    tree on the current step. This indices are countered
+ +//                    relatively to the sample_idx, so that pairs
+ +//                    (train_data[sample_idx[i]], responses[sample_idx[i]])
+ +//                    are used for training a decision tree.
+ +//                    Training set is randomly splited
+ +//                    in two parts (subsample_train and subsample_test)
+ +//                    on every iteration accordingly to the portion parameter.
+ +// subsample_test   - relative indices of samples from the training set,
+ +//                    which are not used for training a tree on the current
+ +//                    step.
+ +// missing          - mask of the missing values in the training set. This
+ +//                    matrix has the same size as train_data. 1 - missing
+ +//                    value, 0 - not a missing value.
+ +// class_labels     - output class labels map.
+ +// rng              - random number generator. Used for spliting the
+ +//                    training set.
+ +// class_count      - count of output classes.
+ +//                    class_count == 1 in the case of regression,
+ +//                    and > 1 in the case of classification.
+ +// delta            - Huber loss function parameter.
+ +// base_value       - start point of the gradient descent procedure.
+ +//                    model prediction is
+ +//                    f(x) = f_0 + sum_{i=1..weak_count-1}(f_i(x)), where
+ +//                    f_0 is the base value.
+ +
+ +
+ +
+ +class CV_EXPORTS_W CvGBTrees : public CvStatModel
+ +{
+ +public:
+ +
+ +    /*
+ +    // DataType: ENUM
+ +    // Loss functions implemented in CvGBTrees.
+ +    //
+ +    // SQUARED_LOSS
+ +    // problem: regression
+ +    // loss = (x - x')^2
+ +    //
+ +    // ABSOLUTE_LOSS
+ +    // problem: regression
+ +    // loss = abs(x - x')
+ +    //
+ +    // HUBER_LOSS
+ +    // problem: regression
+ +    // loss = delta*( abs(x - x') - delta/2), if abs(x - x') > delta
+ +    //           1/2*(x - x')^2, if abs(x - x') <= delta,
+ +    //           where delta is the alpha-quantile of pseudo responses from
+ +    //           the training set.
+ +    //
+ +    // DEVIANCE_LOSS
+ +    // problem: classification
+ +    //
+ +    */
+ +    enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
+ +
+ +
+ +    /*
+ +    // Default constructor. Creates a model only (without training).
+ +    // Should be followed by one form of the train(...) function.
+ +    //
+ +    // API
+ +    // CvGBTrees();
+ +
+ +    // INPUT
+ +    // OUTPUT
+ +    // RESULT
+ +    */
+ +    CV_WRAP CvGBTrees();
+ +
+ +
+ +    /*
+ +    // Full form constructor. Creates a gradient boosting model and does the
+ +    // train.
+ +    //
+ +    // API
+ +    // CvGBTrees( const CvMat* trainData, int tflag,
+ +             const CvMat* responses, const CvMat* varIdx=0,
+ +             const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +             const CvMat* missingDataMask=0,
+ +             CvGBTreesParams params=CvGBTreesParams() );
+ +
+ +    // INPUT
+ +    // trainData    - a set of input feature vectors.
+ +    //                  size of matrix is
+ +    //                  <count of samples> x <variables count>
+ +    //                  or <variables count> x <count of samples>
+ +    //                  depending on the tflag parameter.
+ +    //                  matrix values are float.
+ +    // tflag         - a flag showing how do samples stored in the
+ +    //                  trainData matrix row by row (tflag=CV_ROW_SAMPLE)
+ +    //                  or column by column (tflag=CV_COL_SAMPLE).
+ +    // responses     - a vector of responses corresponding to the samples
+ +    //                  in trainData.
+ +    // varIdx       - indices of used variables. zero value means that all
+ +    //                  variables are active.
+ +    // sampleIdx    - indices of used samples. zero value means that all
+ +    //                  samples from trainData are in the training set.
+ +    // varType      - vector of <variables count> length. gives every
+ +    //                  variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
+ +    //                  varType = 0 means all variables are numerical.
+ +    // missingDataMask  - a mask of misiing values in trainData.
+ +    //                  missingDataMask = 0 means that there are no missing
+ +    //                  values.
+ +    // params         - parameters of GTB algorithm.
+ +    // OUTPUT
+ +    // RESULT
+ +    */
+ +    CvGBTrees( const CvMat* trainData, int tflag,
+ +             const CvMat* responses, const CvMat* varIdx=0,
+ +             const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +             const CvMat* missingDataMask=0,
+ +             CvGBTreesParams params=CvGBTreesParams() );
+ +
+ +
+ +    /*
+ +    // Destructor.
+ +    */
+ +    virtual ~CvGBTrees();
+ +
+ +
+ +    /*
+ +    // Gradient tree boosting model training
+ +    //
+ +    // API
+ +    // virtual bool train( const CvMat* trainData, int tflag,
+ +             const CvMat* responses, const CvMat* varIdx=0,
+ +             const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +             const CvMat* missingDataMask=0,
+ +             CvGBTreesParams params=CvGBTreesParams(),
+ +             bool update=false );
+ +
+ +    // INPUT
+ +    // trainData    - a set of input feature vectors.
+ +    //                  size of matrix is
+ +    //                  <count of samples> x <variables count>
+ +    //                  or <variables count> x <count of samples>
+ +    //                  depending on the tflag parameter.
+ +    //                  matrix values are float.
+ +    // tflag         - a flag showing how do samples stored in the
+ +    //                  trainData matrix row by row (tflag=CV_ROW_SAMPLE)
+ +    //                  or column by column (tflag=CV_COL_SAMPLE).
+ +    // responses     - a vector of responses corresponding to the samples
+ +    //                  in trainData.
+ +    // varIdx       - indices of used variables. zero value means that all
+ +    //                  variables are active.
+ +    // sampleIdx    - indices of used samples. zero value means that all
+ +    //                  samples from trainData are in the training set.
+ +    // varType      - vector of <variables count> length. gives every
+ +    //                  variable type CV_VAR_CATEGORICAL or CV_VAR_ORDERED.
+ +    //                  varType = 0 means all variables are numerical.
+ +    // missingDataMask  - a mask of misiing values in trainData.
+ +    //                  missingDataMask = 0 means that there are no missing
+ +    //                  values.
+ +    // params         - parameters of GTB algorithm.
+ +    // update         - is not supported now. (!)
+ +    // OUTPUT
+ +    // RESULT
+ +    // Error state.
+ +    */
+ +    virtual bool train( const CvMat* trainData, int tflag,
+ +             const CvMat* responses, const CvMat* varIdx=0,
+ +             const CvMat* sampleIdx=0, const CvMat* varType=0,
+ +             const CvMat* missingDataMask=0,
+ +             CvGBTreesParams params=CvGBTreesParams(),
+ +             bool update=false );
+ +
+ +
+ +    /*
+ +    // Gradient tree boosting model training
+ +    //
+ +    // API
+ +    // virtual bool train( CvMLData* data,
+ +             CvGBTreesParams params=CvGBTreesParams(),
+ +             bool update=false ) {return false;};
+ +
+ +    // INPUT
+ +    // data          - training set.
+ +    // params        - parameters of GTB algorithm.
+ +    // update        - is not supported now. (!)
+ +    // OUTPUT
+ +    // RESULT
+ +    // Error state.
+ +    */
+ +    virtual bool train( CvMLData* data,
+ +             CvGBTreesParams params=CvGBTreesParams(),
+ +             bool update=false );
+ +
+ +
+ +    /*
+ +    // Response value prediction
+ +    //
+ +    // API
+ +    // virtual float predict_serial( const CvMat* sample, const CvMat* missing=0,
+ +             CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
+ +             int k=-1 ) const;
+ +
+ +    // INPUT
+ +    // sample         - input sample of the same type as in the training set.
+ +    // missing        - missing values mask. missing=0 if there are no
+ +    //                   missing values in sample vector.
+ +    // weak_responses  - predictions of all of the trees.
+ +    //                   not implemented (!)
+ +    // slice           - part of the ensemble used for prediction.
+ +    //                   slice = CV_WHOLE_SEQ when all trees are used.
+ +    // k               - number of ensemble used.
+ +    //                   k is in {-1,0,1,..,<count of output classes-1>}.
+ +    //                   in the case of classification problem
+ +    //                   <count of output classes-1> ensembles are built.
+ +    //                   If k = -1 ordinary prediction is the result,
+ +    //                   otherwise function gives the prediction of the
+ +    //                   k-th ensemble only.
+ +    // OUTPUT
+ +    // RESULT
+ +    // Predicted value.
+ +    */
+ +    virtual float predict_serial( const CvMat* sample, const CvMat* missing=0,
+ +            CvMat* weakResponses=0, CvSlice slice = CV_WHOLE_SEQ,
+ +            int k=-1 ) const;
+ +
+ +    /*
+ +    // Response value prediction.
+ +    // Parallel version (in the case of TBB existence)
+ +    //
+ +    // API
+ +    // virtual float predict( const CvMat* sample, const CvMat* missing=0,
+ +             CvMat* weak_responses=0, CvSlice slice = CV_WHOLE_SEQ,
+ +             int k=-1 ) const;
+ +
+ +    // INPUT
+ +    // sample         - input sample of the same type as in the training set.
+ +    // missing        - missing values mask. missing=0 if there are no
+ +    //                   missing values in sample vector.
+ +    // weak_responses  - predictions of all of the trees.
+ +    //                   not implemented (!)
+ +    // slice           - part of the ensemble used for prediction.
+ +    //                   slice = CV_WHOLE_SEQ when all trees are used.
+ +    // k               - number of ensemble used.
+ +    //                   k is in {-1,0,1,..,<count of output classes-1>}.
+ +    //                   in the case of classification problem
+ +    //                   <count of output classes-1> ensembles are built.
+ +    //                   If k = -1 ordinary prediction is the result,
+ +    //                   otherwise function gives the prediction of the
+ +    //                   k-th ensemble only.
+ +    // OUTPUT
+ +    // RESULT
+ +    // Predicted value.
+ +    */
+ +    virtual float predict( const CvMat* sample, const CvMat* missing=0,
+ +            CvMat* weakResponses=0, CvSlice slice = CV_WHOLE_SEQ,
+ +            int k=-1 ) const;
+ +
+ +    /*
+ +    // Deletes all the data.
+ +    //
+ +    // API
+ +    // virtual void clear();
+ +
+ +    // INPUT
+ +    // OUTPUT
+ +    // delete data, weak, orig_response, sum_response,
+ +    //        weak_eval, subsample_train, subsample_test,
+ +    //        sample_idx, missing, lass_labels
+ +    // delta = 0.0
+ +    // RESULT
+ +    */
+ +    CV_WRAP virtual void clear();
+ +
+ +    /*
+ +    // Compute error on the train/test set.
+ +    //
+ +    // API
+ +    // virtual float calc_error( CvMLData* _data, int type,
+ +    //        std::vector<float> *resp = 0 );
+ +    //
+ +    // INPUT
+ +    // data  - dataset
+ +    // type  - defines which error is to compute: train (CV_TRAIN_ERROR) or
+ +    //         test (CV_TEST_ERROR).
+ +    // OUTPUT
+ +    // resp  - vector of predicitons
+ +    // RESULT
+ +    // Error value.
+ +    */
+ +    virtual float calc_error( CvMLData* _data, int type,
+ +            std::vector<float> *resp = 0 );
+ +
+ +    /*
+ +    //
+ +    // Write parameters of the gtb model and data. Write learned model.
+ +    //
+ +    // API
+ +    // virtual void write( CvFileStorage* fs, const char* name ) const;
+ +    //
+ +    // INPUT
+ +    // fs     - file storage to read parameters from.
+ +    // name   - model name.
+ +    // OUTPUT
+ +    // RESULT
+ +    */
+ +    virtual void write( CvFileStorage* fs, const char* name ) const;
+ +
+ +
+ +    /*
+ +    //
+ +    // Read parameters of the gtb model and data. Read learned model.
+ +    //
+ +    // API
+ +    // virtual void read( CvFileStorage* fs, CvFileNode* node );
+ +    //
+ +    // INPUT
+ +    // fs     - file storage to read parameters from.
+ +    // node   - file node.
+ +    // OUTPUT
+ +    // RESULT
+ +    */
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node );
+ +
+ +
+ +    // new-style C++ interface
+ +    CV_WRAP CvGBTrees( const cv::Mat& trainData, int tflag,
+ +              const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
+ +              const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
+ +              const cv::Mat& missingDataMask=cv::Mat(),
+ +              CvGBTreesParams params=CvGBTreesParams() );
+ +
+ +    CV_WRAP virtual bool train( const cv::Mat& trainData, int tflag,
+ +                       const cv::Mat& responses, const cv::Mat& varIdx=cv::Mat(),
+ +                       const cv::Mat& sampleIdx=cv::Mat(), const cv::Mat& varType=cv::Mat(),
+ +                       const cv::Mat& missingDataMask=cv::Mat(),
+ +                       CvGBTreesParams params=CvGBTreesParams(),
+ +                       bool update=false );
+ +
+ +    CV_WRAP virtual float predict( const cv::Mat& sample, const cv::Mat& missing=cv::Mat(),
+ +                           const cv::Range& slice = cv::Range::all(),
+ +                           int k=-1 ) const;
+ +
+ +protected:
+ +
+ +    /*
+ +    // Compute the gradient vector components.
+ +    //
+ +    // API
+ +    // virtual void find_gradient( const int k = 0);
+ +
+ +    // INPUT
+ +    // k        - used for classification problem, determining current
+ +    //            tree ensemble.
+ +    // OUTPUT
+ +    // changes components of data->responses
+ +    // which correspond to samples used for training
+ +    // on the current step.
+ +    // RESULT
+ +    */
+ +    virtual void find_gradient( const int k = 0);
+ +
+ +
+ +    /*
+ +    //
+ +    // Change values in tree leaves according to the used loss function.
+ +    //
+ +    // API
+ +    // virtual void change_values(CvDTree* tree, const int k = 0);
+ +    //
+ +    // INPUT
+ +    // tree      - decision tree to change.
+ +    // k         - used for classification problem, determining current
+ +    //             tree ensemble.
+ +    // OUTPUT
+ +    // changes 'value' fields of the trees' leaves.
+ +    // changes sum_response_tmp.
+ +    // RESULT
+ +    */
+ +    virtual void change_values(CvDTree* tree, const int k = 0);
+ +
+ +
+ +    /*
+ +    //
+ +    // Find optimal constant prediction value according to the used loss
+ +    // function.
+ +    // The goal is to find a constant which gives the minimal summary loss
+ +    // on the _Idx samples.
+ +    //
+ +    // API
+ +    // virtual float find_optimal_value( const CvMat* _Idx );
+ +    //
+ +    // INPUT
+ +    // _Idx        - indices of the samples from the training set.
+ +    // OUTPUT
+ +    // RESULT
+ +    // optimal constant value.
+ +    */
+ +    virtual float find_optimal_value( const CvMat* _Idx );
+ +
+ +
+ +    /*
+ +    //
+ +    // Randomly split the whole training set in two parts according
+ +    // to params.portion.
+ +    //
+ +    // API
+ +    // virtual void do_subsample();
+ +    //
+ +    // INPUT
+ +    // OUTPUT
+ +    // subsample_train - indices of samples used for training
+ +    // subsample_test  - indices of samples used for test
+ +    // RESULT
+ +    */
+ +    virtual void do_subsample();
+ +
+ +
+ +    /*
+ +    //
+ +    // Internal recursive function giving an array of subtree tree leaves.
+ +    //
+ +    // API
+ +    // void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );
+ +    //
+ +    // INPUT
+ +    // node         - current leaf.
+ +    // OUTPUT
+ +    // count        - count of leaves in the subtree.
+ +    // leaves       - array of pointers to leaves.
+ +    // RESULT
+ +    */
+ +    void leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node );
+ +
+ +
+ +    /*
+ +    //
+ +    // Get leaves of the tree.
+ +    //
+ +    // API
+ +    // CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );
+ +    //
+ +    // INPUT
+ +    // dtree            - decision tree.
+ +    // OUTPUT
+ +    // len              - count of the leaves.
+ +    // RESULT
+ +    // CvDTreeNode**    - array of pointers to leaves.
+ +    */
+ +    CvDTreeNode** GetLeaves( const CvDTree* dtree, int& len );
+ +
+ +
+ +    /*
+ +    //
+ +    // Is it a regression or a classification.
+ +    //
+ +    // API
+ +    // bool problem_type();
+ +    //
+ +    // INPUT
+ +    // OUTPUT
+ +    // RESULT
+ +    // false if it is a classification problem,
+ +    // true - if regression.
+ +    */
+ +    virtual bool problem_type() const;
+ +
+ +
+ +    /*
+ +    //
+ +    // Write parameters of the gtb model.
+ +    //
+ +    // API
+ +    // virtual void write_params( CvFileStorage* fs ) const;
+ +    //
+ +    // INPUT
+ +    // fs           - file storage to write parameters to.
+ +    // OUTPUT
+ +    // RESULT
+ +    */
+ +    virtual void write_params( CvFileStorage* fs ) const;
+ +
+ +
+ +    /*
+ +    //
+ +    // Read parameters of the gtb model and data.
+ +    //
+ +    // API
+ +    // virtual void read_params( CvFileStorage* fs );
+ +    //
+ +    // INPUT
+ +    // fs           - file storage to read parameters from.
+ +    // OUTPUT
+ +    // params       - parameters of the gtb model.
+ +    // data         - contains information about the structure
+ +    //                of the data set (count of variables,
+ +    //                their types, etc.).
+ +    // class_labels - output class labels map.
+ +    // RESULT
+ +    */
+ +    virtual void read_params( CvFileStorage* fs, CvFileNode* fnode );
+ +    int get_len(const CvMat* mat) const;
+ +
+ +
+ +    CvDTreeTrainData* data;
+ +    CvGBTreesParams params;
+ +
+ +    CvSeq** weak;
+ +    CvMat* orig_response;
+ +    CvMat* sum_response;
+ +    CvMat* sum_response_tmp;
+ +    CvMat* sample_idx;
+ +    CvMat* subsample_train;
+ +    CvMat* subsample_test;
+ +    CvMat* missing;
+ +    CvMat* class_labels;
+ +
+ +    cv::RNG* rng;
+ +
+ +    int class_count;
+ +    float delta;
+ +    float base_value;
+ +
+ +};
+ +
+ +
+ +
+ +/****************************************************************************************\
+ +*                              Artificial Neural Networks (ANN)                          *
+ +\****************************************************************************************/
+ +
+ +/////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////
+ +
+ +struct CV_EXPORTS_W_MAP CvANN_MLP_TrainParams
+ +{
+ +    CvANN_MLP_TrainParams();
+ +    CvANN_MLP_TrainParams( CvTermCriteria term_crit, int train_method,
+ +                           double param1, double param2=0 );
+ +    ~CvANN_MLP_TrainParams();
+ +
+ +    enum { BACKPROP=0, RPROP=1 };
+ +
+ +    CV_PROP_RW CvTermCriteria term_crit;
+ +    CV_PROP_RW int train_method;
+ +
+ +    // backpropagation parameters
+ +    CV_PROP_RW double bp_dw_scale, bp_moment_scale;
+ +
+ +    // rprop parameters
+ +    CV_PROP_RW double rp_dw0, rp_dw_plus, rp_dw_minus, rp_dw_min, rp_dw_max;
+ +};
+ +
+ +
+ +class CV_EXPORTS_W CvANN_MLP : public CvStatModel
+ +{
+ +public:
+ +    CV_WRAP CvANN_MLP();
+ +    CvANN_MLP( const CvMat* layerSizes,
+ +               int activateFunc=CvANN_MLP::SIGMOID_SYM,
+ +               double fparam1=0, double fparam2=0 );
+ +
+ +    virtual ~CvANN_MLP();
+ +
+ +    virtual void create( const CvMat* layerSizes,
+ +                         int activateFunc=CvANN_MLP::SIGMOID_SYM,
+ +                         double fparam1=0, double fparam2=0 );
+ +
+ +    virtual int train( const CvMat* inputs, const CvMat* outputs,
+ +                       const CvMat* sampleWeights, const CvMat* sampleIdx=0,
+ +                       CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(),
+ +                       int flags=0 );
+ +    virtual float predict( const CvMat* inputs, CV_OUT CvMat* outputs ) const;
+ +
+ +    CV_WRAP CvANN_MLP( const cv::Mat& layerSizes,
+ +              int activateFunc=CvANN_MLP::SIGMOID_SYM,
+ +              double fparam1=0, double fparam2=0 );
+ +
+ +    CV_WRAP virtual void create( const cv::Mat& layerSizes,
+ +                        int activateFunc=CvANN_MLP::SIGMOID_SYM,
+ +                        double fparam1=0, double fparam2=0 );
+ +
+ +    CV_WRAP virtual int train( const cv::Mat& inputs, const cv::Mat& outputs,
+ +                      const cv::Mat& sampleWeights, const cv::Mat& sampleIdx=cv::Mat(),
+ +                      CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(),
+ +                      int flags=0 );
+ +
+ +    CV_WRAP virtual float predict( const cv::Mat& inputs, CV_OUT cv::Mat& outputs ) const;
+ +
+ +    CV_WRAP virtual void clear();
+ +
+ +    // possible activation functions
+ +    enum { IDENTITY = 0, SIGMOID_SYM = 1, GAUSSIAN = 2 };
+ +
+ +    // available training flags
+ +    enum { UPDATE_WEIGHTS = 1, NO_INPUT_SCALE = 2, NO_OUTPUT_SCALE = 4 };
+ +
+ +    virtual void read( CvFileStorage* fs, CvFileNode* node );
+ +    virtual void write( CvFileStorage* storage, const char* name ) const;
+ +
+ +    int get_layer_count() { return layer_sizes ? layer_sizes->cols : 0; }
+ +    const CvMat* get_layer_sizes() { return layer_sizes; }
+ +    double* get_weights(int layer)
+ +    {
+ +        return layer_sizes && weights &&
+ +            (unsigned)layer <= (unsigned)layer_sizes->cols ? weights[layer] : 0;
+ +    }
+ +
+ +    virtual void calc_activ_func_deriv( CvMat* xf, CvMat* deriv, const double* bias ) const;
+ +
+ +protected:
+ +
+ +    virtual bool prepare_to_train( const CvMat* _inputs, const CvMat* _outputs,
+ +            const CvMat* _sample_weights, const CvMat* sampleIdx,
+ +            CvVectors* _ivecs, CvVectors* _ovecs, double** _sw, int _flags );
+ +
+ +    // sequential random backpropagation
+ +    virtual int train_backprop( CvVectors _ivecs, CvVectors _ovecs, const double* _sw );
+ +
+ +    // RPROP algorithm
+ +    virtual int train_rprop( CvVectors _ivecs, CvVectors _ovecs, const double* _sw );
+ +
+ +    virtual void calc_activ_func( CvMat* xf, const double* bias ) const;
+ +    virtual void set_activ_func( int _activ_func=SIGMOID_SYM,
+ +                                 double _f_param1=0, double _f_param2=0 );
+ +    virtual void init_weights();
+ +    virtual void scale_input( const CvMat* _src, CvMat* _dst ) const;
+ +    virtual void scale_output( const CvMat* _src, CvMat* _dst ) const;
+ +    virtual void calc_input_scale( const CvVectors* vecs, int flags );
+ +    virtual void calc_output_scale( const CvVectors* vecs, int flags );
+ +
+ +    virtual void write_params( CvFileStorage* fs ) const;
+ +    virtual void read_params( CvFileStorage* fs, CvFileNode* node );
+ +
+ +    CvMat* layer_sizes;
+ +    CvMat* wbuf;
+ +    CvMat* sample_weights;
+ +    double** weights;
+ +    double f_param1, f_param2;
+ +    double min_val, max_val, min_val1, max_val1;
+ +    int activ_func;
+ +    int max_count, max_buf_sz;
+ +    CvANN_MLP_TrainParams params;
+ +    cv::RNG* rng;
+ +};
+ +
+ +/****************************************************************************************\
+ +*                           Auxilary functions declarations                              *
+ +\****************************************************************************************/
+ +
+ +/* Generates <sample> from multivariate normal distribution, where <mean> - is an
+ +   average row vector, <cov> - symmetric covariation matrix */
+ +CVAPI(void) cvRandMVNormal( CvMat* mean, CvMat* cov, CvMat* sample,
+ +                           CvRNG* rng CV_DEFAULT(0) );
+ +
+ +/* Generates sample from gaussian mixture distribution */
+ +CVAPI(void) cvRandGaussMixture( CvMat* means[],
+ +                               CvMat* covs[],
+ +                               float weights[],
+ +                               int clsnum,
+ +                               CvMat* sample,
+ +                               CvMat* sampClasses CV_DEFAULT(0) );
+ +
+ +#define CV_TS_CONCENTRIC_SPHERES 0
+ +
+ +/* creates test set */
+ +CVAPI(void) cvCreateTestSet( int type, CvMat** samples,
+ +                 int num_samples,
+ +                 int num_features,
+ +                 CvMat** responses,
+ +                 int num_classes, ... );
+ +
+ +/****************************************************************************************\
+ +*                                      Data                                             *
+ +\****************************************************************************************/
+ +
+ +#define CV_COUNT     0
+ +#define CV_PORTION   1
+ +
+ +struct CV_EXPORTS CvTrainTestSplit
+ +{
+ +    CvTrainTestSplit();
+ +    CvTrainTestSplit( int train_sample_count, bool mix = true);
+ +    CvTrainTestSplit( float train_sample_portion, bool mix = true);
+ +
+ +    union
+ +    {
+ +        int count;
+ +        float portion;
+ +    } train_sample_part;
+ +    int train_sample_part_mode;
+ +
+ +    bool mix;
+ +};
+ +
+ +class CV_EXPORTS CvMLData
+ +{
+ +public:
+ +    CvMLData();
+ +    virtual ~CvMLData();
+ +
+ +    // returns:
+ +    // 0 - OK
+ +    // -1 - file can not be opened or is not correct
+ +    int read_csv( const char* filename );
+ +
+ +    const CvMat* get_values() const;
+ +    const CvMat* get_responses();
+ +    const CvMat* get_missing() const;
+ +
+ +    void set_header_lines_number( int n );
+ +    int get_header_lines_number() const;
+ +
+ +    void set_response_idx( int idx ); // old response become predictors, new response_idx = idx
+ +                                      // if idx < 0 there will be no response
+ +    int get_response_idx() const;
+ +
+ +    void set_train_test_split( const CvTrainTestSplit * spl );
+ +    const CvMat* get_train_sample_idx() const;
+ +    const CvMat* get_test_sample_idx() const;
+ +    void mix_train_and_test_idx();
+ +
+ +    const CvMat* get_var_idx();
+ +    void chahge_var_idx( int vi, bool state ); // misspelled (saved for back compitability),
+ +                                               // use change_var_idx
+ +    void change_var_idx( int vi, bool state ); // state == true to set vi-variable as predictor
+ +
+ +    const CvMat* get_var_types();
+ +    int get_var_type( int var_idx ) const;
+ +    // following 2 methods enable to change vars type
+ +    // use these methods to assign CV_VAR_CATEGORICAL type for categorical variable
+ +    // with numerical labels; in the other cases var types are correctly determined automatically
+ +    void set_var_types( const char* str );  // str examples:
+ +                                            // "ord[0-17],cat[18]", "ord[0,2,4,10-12], cat[1,3,5-9,13,14]",
+ +                                            // "cat", "ord" (all vars are categorical/ordered)
+ +    void change_var_type( int var_idx, int type); // type in { CV_VAR_ORDERED, CV_VAR_CATEGORICAL }
+ +
+ +    void set_delimiter( char ch );
+ +    char get_delimiter() const;
+ +
+ +    void set_miss_ch( char ch );
+ +    char get_miss_ch() const;
+ +
+ +    const std::map<std::string, int>& get_class_labels_map() const;
+ +
+ +protected:
+ +    virtual void clear();
+ +
+ +    void str_to_flt_elem( const char* token, float& flt_elem, int& type);
+ +    void free_train_test_idx();
+ +
+ +    char delimiter;
+ +    char miss_ch;
+ +    //char flt_separator;
+ +
+ +    CvMat* values;
+ +    CvMat* missing;
+ +    CvMat* var_types;
+ +    CvMat* var_idx_mask;
+ +
+ +    CvMat* response_out; // header
+ +    CvMat* var_idx_out; // mat
+ +    CvMat* var_types_out; // mat
+ +
+ +    int header_lines_number;
+ +
+ +    int response_idx;
+ +
+ +    int train_sample_count;
+ +    bool mix;
+ +
+ +    int total_class_count;
+ +    std::map<std::string, int> class_map;
+ +
+ +    CvMat* train_sample_idx;
+ +    CvMat* test_sample_idx;
+ +    int* sample_idx; // data of train_sample_idx and test_sample_idx
+ +
+ +    cv::RNG* rng;
+ +};
+ +
+ +
+ +namespace cv
+ +{
+ +
+ +typedef CvStatModel StatModel;
+ +typedef CvParamGrid ParamGrid;
+ +typedef CvNormalBayesClassifier NormalBayesClassifier;
+ +typedef CvKNearest KNearest;
+ +typedef CvSVMParams SVMParams;
+ +typedef CvSVMKernel SVMKernel;
+ +typedef CvSVMSolver SVMSolver;
+ +typedef CvSVM SVM;
+ +typedef CvDTreeParams DTreeParams;
+ +typedef CvMLData TrainData;
+ +typedef CvDTree DecisionTree;
+ +typedef CvForestTree ForestTree;
+ +typedef CvRTParams RandomTreeParams;
+ +typedef CvRTrees RandomTrees;
+ +typedef CvERTreeTrainData ERTreeTRainData;
+ +typedef CvForestERTree ERTree;
+ +typedef CvERTrees ERTrees;
+ +typedef CvBoostParams BoostParams;
+ +typedef CvBoostTree BoostTree;
+ +typedef CvBoost Boost;
+ +typedef CvANN_MLP_TrainParams ANN_MLP_TrainParams;
+ +typedef CvANN_MLP NeuralNet_MLP;
+ +typedef CvGBTreesParams GradientBoostingTreeParams;
+ +typedef CvGBTrees GradientBoostingTrees;
+ +
+ +template<> CV_EXPORTS void Ptr<CvDTreeSplit>::delete_obj();
+ +
+ +CV_EXPORTS bool initModule_ml(void);
+ +}
+ +
+ +#endif // __cplusplus
+ +#endif // __OPENCV_ML_HPP__
+ +
+ +/* End of file. */
diff --cc modules/ml/src/svm.cpp
Simple merge
diff --cc modules/nonfree/include/opencv2/nonfree/gpu.hpp

index 0000000,9110226..823c377

mode 000000,100644..100644
--- /dev/null
--- 2/modules/nonfree/include/opencv2/nonfree/gpu.hpp
+++ b/modules/nonfree/include/opencv2/nonfree/gpu.hpp
@@@ -1,0 -1,169 +1,169 @@@
- -#include "opencv2/gpu/gpu.hpp"
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other GpuMaterials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #ifndef __OPENCV_NONFREE_GPU_HPP__
+ #define __OPENCV_NONFREE_GPU_HPP__
+ 
+ #include "opencv2/opencv_modules.hpp"
+ 
+ #if defined(HAVE_OPENCV_GPU)
+ 
++#include "opencv2/gpu.hpp"
+ 
+ namespace cv { namespace gpu {
+ 
+ class CV_EXPORTS SURF_GPU
+ {
+ public:
+     enum KeypointLayout
+     {
+         X_ROW = 0,
+         Y_ROW,
+         LAPLACIAN_ROW,
+         OCTAVE_ROW,
+         SIZE_ROW,
+         ANGLE_ROW,
+         HESSIAN_ROW,
+         ROWS_COUNT
+     };
+ 
+     //! the default constructor
+     SURF_GPU();
+     //! the full constructor taking all the necessary parameters
+     explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4,
+          int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
+ 
+     //! returns the descriptor size in float's (64 or 128)
+     int descriptorSize() const;
+ 
+     //! upload host keypoints to device memory
+     void uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU);
+     //! download keypoints from device to host memory
+     void downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints);
+ 
+     //! download descriptors from device to host memory
+     void downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors);
+ 
+     //! finds the keypoints using fast hessian detector used in SURF
+     //! supports CV_8UC1 images
+     //! keypoints will have nFeature cols and 6 rows
+     //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
+     //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
+     //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
+     //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
+     //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
+     //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
+     //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
+     void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints);
+     //! finds the keypoints and computes their descriptors.
+     //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
+     void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
+         bool useProvidedKeypoints = false);
+ 
+     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
+     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
+         bool useProvidedKeypoints = false);
+ 
+     void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
+         bool useProvidedKeypoints = false);
+ 
+     void releaseMemory();
+ 
+     // SURF parameters
+     double hessianThreshold;
+     int nOctaves;
+     int nOctaveLayers;
+     bool extended;
+     bool upright;
+ 
+     //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
+     float keypointsRatio;
+ 
+     GpuMat sum, mask1, maskSum, intBuffer;
+ 
+     GpuMat det, trace;
+ 
+     GpuMat maxPosBuffer;
+ };
+ 
+ /*!
+  * The class implements the following algorithm:
+  * "ViBe: A universal background subtraction algorithm for video sequences"
+  * O. Barnich and M. Van D Roogenbroeck
+  * IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
+  */
+ class CV_EXPORTS VIBE_GPU
+ {
+ public:
+     //! the default constructor
+     explicit VIBE_GPU(unsigned long rngSeed = 1234567);
+ 
+     //! re-initiaization method
+     void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
+ 
+     //! the update operator
+     void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
+ 
+     //! releases all inner buffers
+     void release();
+ 
+     int nbSamples;         // number of samples per pixel
+     int reqMatches;        // #_min
+     int radius;            // R
+     int subsamplingFactor; // amount of random subsampling
+ 
+ private:
+     Size frameSize_;
+ 
+     unsigned long rngSeed_;
+     GpuMat randStates_;
+ 
+     GpuMat samples_;
+ };
+ 
+ } // namespace gpu
+ 
+ } // namespace cv
+ 
+ #endif // defined(HAVE_OPENCV_GPU)
+ 
+ #endif // __OPENCV_NONFREE_GPU_HPP__
diff --cc modules/nonfree/include/opencv2/nonfree/ocl.hpp

index 0000000,61b3c00..9ceb9c3

mode 000000,100644..100644
--- /dev/null
--- 2/modules/nonfree/include/opencv2/nonfree/ocl.hpp
+++ b/modules/nonfree/include/opencv2/nonfree/ocl.hpp
@@@ -1,0 -1,124 +1,124 @@@
- -#include "opencv2/ocl/ocl.hpp"
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ // By downloading, copying, installing or using the software you agree to this license.
+ // If you do not agree to this license, do not download, install,
+ // copy or use the software.
+ //
+ //
+ // License Agreement
+ // For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ // * Redistribution's of source code must retain the above copyright notice,
+ // this list of conditions and the following disclaimer.
+ //
+ // * Redistribution's in binary form must reproduce the above copyright notice,
+ // this list of conditions and the following disclaimer in the documentation
+ // and/or other materials provided with the distribution.
+ //
+ // * The name of the copyright holders may not be used to endorse or promote products
+ // derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #ifndef __OPENCV_NONFREE_OCL_HPP__
+ #define __OPENCV_NONFREE_OCL_HPP__
+ 
- -            void uploadKeypoints(const vector<cv::KeyPoint> &keypoints, oclMat &keypointsocl);
++#include "opencv2/ocl.hpp"
+ 
+ namespace cv
+ {
+     namespace ocl
+     {
+         //! Speeded up robust features, port from GPU module.
+         ////////////////////////////////// SURF //////////////////////////////////////////
+ 
+         class CV_EXPORTS SURF_OCL
+         {
+         public:
+             enum KeypointLayout
+             {
+                 X_ROW = 0,
+                 Y_ROW,
+                 LAPLACIAN_ROW,
+                 OCTAVE_ROW,
+                 SIZE_ROW,
+                 ANGLE_ROW,
+                 HESSIAN_ROW,
+                 ROWS_COUNT
+             };
+ 
+             //! the default constructor
+             SURF_OCL();
+             //! the full constructor taking all the necessary parameters
+             explicit SURF_OCL(double _hessianThreshold, int _nOctaves = 4,
+                               int _nOctaveLayers = 2, bool _extended = false, float _keypointsRatio = 0.01f, bool _upright = false);
+ 
+             //! returns the descriptor size in float's (64 or 128)
+             int descriptorSize() const;
+             //! upload host keypoints to device memory
- -            void downloadKeypoints(const oclMat &keypointsocl, vector<KeyPoint> &keypoints);
++            void uploadKeypoints(const std::vector<cv::KeyPoint> &keypoints, oclMat &keypointsocl);
+             //! download keypoints from device to host memory
- -            void downloadDescriptors(const oclMat &descriptorsocl, vector<float> &descriptors);
++            void downloadKeypoints(const oclMat &keypointsocl, std::vector<KeyPoint> &keypoints);
+             //! download descriptors from device to host memory
++            void downloadDescriptors(const oclMat &descriptorsocl, std::vector<float> &descriptors);
+             //! finds the keypoints using fast hessian detector used in SURF
+             //! supports CV_8UC1 images
+             //! keypoints will have nFeature cols and 6 rows
+             //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
+             //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
+             //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
+             //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
+             //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
+             //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
+             //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
+             void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints);
+             //! finds the keypoints and computes their descriptors.
+             //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
+             void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors,
+                             bool useProvidedKeypoints = false);
+             void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints);
+             void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, oclMat &descriptors,
+                             bool useProvidedKeypoints = false);
+             void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, std::vector<float> &descriptors,
+                             bool useProvidedKeypoints = false);
+ 
+             void releaseMemory();
+ 
+             // SURF parameters
+             float hessianThreshold;
+             int nOctaves;
+             int nOctaveLayers;
+             bool extended;
+             bool upright;
+             //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
+             float keypointsRatio;
+             oclMat sum, mask1, maskSum, intBuffer;
+             oclMat det, trace;
+             oclMat maxPosBuffer;
+ 
+         };
+     }
+ }
+ 
+ #endif //__OPENCV_NONFREE_OCL_HPP__
diff --cc modules/nonfree/perf/perf_precomp.hpp

index bb03c28,50a7f98..205045b
--- 1/modules/nonfree/perf/perf_precomp.hpp
--- 2/modules/nonfree/perf/perf_precomp.hpp
+++ b/modules/nonfree/perf/perf_precomp.hpp
@@@ -9,10 -9,19 +9,19 @@@
   #ifndef __OPENCV_PERF_PRECOMP_HPP__
   #define __OPENCV_PERF_PRECOMP_HPP__
   
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/nonfree/nonfree.hpp"
- -#include "opencv2/highgui/highgui.hpp"
+ +#include "opencv2/ts.hpp"
+ +#include "opencv2/nonfree.hpp"
+ +#include "opencv2/highgui.hpp"
   
+ #include "opencv2/opencv_modules.hpp"
+ #ifdef HAVE_OPENCV_OCL
+ #  include "opencv2/nonfree/ocl.hpp"
+ #endif
+ 
+ #if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
+     #include "opencv2/nonfree/gpu.hpp"
+ #endif
+ 
   #ifdef GTEST_CREATE_SHARED_LIBRARY
   #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
   #endif
diff --cc modules/nonfree/src/precomp.hpp

index f4494d4,6c46114..7b1445b
--- 1/modules/nonfree/src/precomp.hpp
--- 2/modules/nonfree/src/precomp.hpp
+++ b/modules/nonfree/src/precomp.hpp
@@@ -47,10 -47,28 +47,30 @@@
   #include "cvconfig.h"
   #endif
   
- -#include "opencv2/opencv_modules.hpp"
+ +#include "opencv2/nonfree.hpp"
+ +#include "opencv2/imgproc.hpp"
   
- -#include "opencv2/nonfree/nonfree.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
+ +#include "opencv2/core/utility.hpp"
   #include "opencv2/core/internal.hpp"
   
- -    
++#include "opencv2/opencv_modules.hpp"
++
+ #if defined(HAVE_OPENCV_GPU)
+     #include "opencv2/nonfree/gpu.hpp"
++
+     #if defined(HAVE_CUDA)
+         #include "opencv2/gpu/stream_accessor.hpp"
+         #include "opencv2/gpu/device/common.hpp"
+ 
+         static inline void throw_nogpu() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
+     #else
+         static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); }
+     #endif
+ #endif
+ 
+ #ifdef HAVE_OPENCV_OCL
+ #  include "opencv2/nonfree/ocl.hpp"
+ #  include "opencv2/ocl/private/util.hpp"
+ #endif
+ 
   #endif
diff --cc modules/nonfree/src/surf.ocl.cpp

index 5d14392,d8336b9..420a5f6
--- 1/modules/ocl/src/surf.cpp
--- 2/modules/nonfree/src/surf.ocl.cpp
+++ b/modules/nonfree/src/surf.ocl.cpp
@@@ -74,7 -75,7 +74,7 @@@ namespace c
   }
   
   
- static inline int divUp(int total, int grain)
- -static inline int divUp(size_t total, size_t grain)
++static inline size_t divUp(size_t total, size_t grain)
   {
       return (total + grain - 1) / grain;
   }
@@@ -504,11 -506,11 +505,11 @@@ void SURF_OCL_Invoker::icvCalcLayerDetA
       size_t localThreads[3]  = {16, 16, 1};
       size_t globalThreads[3] =
       {
- -        divUp(max_samples_j, localThreads[0]) * localThreads[0],
- -        divUp(max_samples_i, localThreads[1]) * localThreads[1] *(nOctaveLayers + 2),
+ +        divUp(max_samples_j, localThreads[0]) *localThreads[0],
+ +        divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
           1
       };
-     openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+     openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
   }
   
   void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
@@@ -517,26 -519,26 +518,26 @@@
       const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
   
       Context *clCxt = det.clCxt;
- -    string kernelName = useMask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer";
- -    vector< pair<size_t, const void *> > args;
- -
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&maxCounter.data));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&counterOffset));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&nLayers));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&layer_cols));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&maxCandidates));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold));
+ +    std::string kernelName = use_mask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer";
+ +    std::vector< std::pair<size_t, const void *> > args;
+ +
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trace.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxCounter.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&counterOffset));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&trace.step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nLayers));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxCandidates));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold));
   
-     if(use_mask)
+     if(useMask)
       {
           if(maskSumTex)
           {
@@@ -558,23 -560,23 +559,23 @@@
   }
   
   void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
-         oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
+         oclMat &keypoints, oclMat &counters_, int octave, int layer_rows, int max_features)
   {
       Context *clCxt = det.clCxt;
- -    string kernelName = "icvInterpolateKeypoint";
- -    vector< pair<size_t, const void *> > args;
- -
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&counters_.data));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&max_features));
+ +    std::string kernelName = "icvInterpolateKeypoint";
+ +    std::vector< std::pair<size_t, const void *> > args;
+ +
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
-     args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counters.data));
++    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counters_.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows));
-     args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxFeatures));
++    args.push_back( std::make_pair( sizeof(cl_int), (void *)&max_features));
   
       size_t localThreads[3]  = {3, 3, 3};
       size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
@@@ -612,18 -614,18 +613,18 @@@ void SURF_OCL_Invoker::icvCalcOrientati
   void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
   {
       Context *clCxt = counters.clCxt;
- -    string kernelName = "icvSetUpright";
+ +    std::string kernelName = "icvSetUpright";
   
- -    vector< pair<size_t, const void *> > args;
+ +    std::vector< std::pair<size_t, const void *> > args;
   
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&nFeatures));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nFeatures));
   
       size_t localThreads[3]  = {256, 1, 1};
-     size_t globalThreads[3] = {nFeatures, 1, 1};
+     size_t globalThreads[3] = {saturate_cast<size_t>(nFeatures), 1, 1};
   
-     openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+     openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
   }
   
   
@@@ -631,8 -633,8 +632,8 @@@ void SURF_OCL_Invoker::compute_descript
   {
       // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
       Context *clCxt = descriptors.clCxt;
-     std::string kernelName = "";
- -    string kernelName;
- -    vector< pair<size_t, const void *> > args;
++    std::string kernelName;
+ +    std::vector< std::pair<size_t, const void *> > args;
       size_t localThreads[3]  = {1, 1, 1};
       size_t globalThreads[3] = {1, 1, 1};
   
@@@ -653,17 -655,17 +654,17 @@@
           }
           else
           {
- -            args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data));
+ +            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data));
           }
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
   
-         openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+         openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
   
           kernelName = "normalize_descriptors64";
   
@@@ -674,10 -676,10 +675,10 @@@
           globalThreads[1] = localThreads[1];
   
           args.clear();
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
   
-         openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+         openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
       }
       else
       {
@@@ -696,17 -698,17 +697,17 @@@
           }
           else
           {
- -            args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data));
+ +            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data));
           }
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
   
-         openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+         openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
   
           kernelName = "normalize_descriptors128";
   
@@@ -717,10 -719,10 +718,10 @@@
           globalThreads[1] = localThreads[1];
   
           args.clear();
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
   
-         openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+         openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
       }
   }
   
diff --cc modules/nonfree/src/surf_gpu.cpp

index 123c8ce,dec9c0d..f2a01cf
--- 1/modules/gpu/src/surf.cpp
--- 2/modules/nonfree/src/surf_gpu.cpp
+++ b/modules/nonfree/src/surf_gpu.cpp
@@@ -50,17 -52,17 +52,17 @@@ using namespace cv::gpu
   cv::gpu::SURF_GPU::SURF_GPU() { throw_nogpu(); }
   cv::gpu::SURF_GPU::SURF_GPU(double, int, int, bool, float, bool) { throw_nogpu(); }
   int cv::gpu::SURF_GPU::descriptorSize() const { throw_nogpu(); return 0;}
- -void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>&, GpuMat&) { throw_nogpu(); }
- -void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat&, vector<KeyPoint>&) { throw_nogpu(); }
- -void cv::gpu::SURF_GPU::downloadDescriptors(const GpuMat&, vector<float>&) { throw_nogpu(); }
+ +void cv::gpu::SURF_GPU::uploadKeypoints(const std::vector<KeyPoint>&, GpuMat&) { throw_nogpu(); }
+ +void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
+ +void cv::gpu::SURF_GPU::downloadDescriptors(const GpuMat&, std::vector<float>&) { throw_nogpu(); }
   void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
   void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_nogpu(); }
- -void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&) { throw_nogpu(); }
- -void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&, GpuMat&, bool) { throw_nogpu(); }
- -void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&, vector<float>&, bool) { throw_nogpu(); }
+ +void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
+ +void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&, bool) { throw_nogpu(); }
+ +void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, std::vector<float>&, bool) { throw_nogpu(); }
   void cv::gpu::SURF_GPU::releaseMemory() { throw_nogpu(); }
   
- #else /* !defined (HAVE_CUDA) */
+ #else // !defined (HAVE_CUDA)
   
   namespace cv { namespace gpu { namespace device
   {
diff --cc modules/nonfree/test/test_precomp.hpp

index 4a14b31,15f2b95..3346fdc
--- 1/modules/nonfree/test/test_precomp.hpp
--- 2/modules/nonfree/test/test_precomp.hpp
+++ b/modules/nonfree/test/test_precomp.hpp
@@@ -9,10 -9,19 +9,20 @@@
   #ifndef __OPENCV_TEST_PRECOMP_HPP__
   #define __OPENCV_TEST_PRECOMP_HPP__
   
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -#include "opencv2/nonfree/nonfree.hpp"
+ +#include <iostream>
+ +#include "opencv2/ts.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/highgui.hpp"
+ +#include "opencv2/nonfree.hpp"
   
+ #include "opencv2/opencv_modules.hpp"
+ #ifdef HAVE_OPENCV_OCL
+ #  include "opencv2/nonfree/ocl.hpp"
+ #endif
+ 
+ #if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA)
+     #include "opencv2/ts/gpu_test.hpp"
+     #include "opencv2/nonfree/gpu.hpp"
+ #endif
+ 
   #endif
diff --cc modules/ocl/include/opencv2/ocl.hpp

index 3cf09dc,0000000..5811a7c

mode 100644,000000..100644
--- 1/modules/ocl/include/opencv2/ocl.hpp
--- /dev/null
+++ b/modules/ocl/include/opencv2/ocl.hpp
@@@ -1,1835 -1,0 +1,1728 @@@
-         class Context
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+ +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other oclMaterials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_OCL_HPP__
+ +#define __OPENCV_OCL_HPP__
+ +
+ +#include <memory>
+ +#include <vector>
+ +
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/objdetect.hpp"
+ +#include "opencv2/features2d.hpp"
+ +
+ +namespace cv
+ +{
+ +    namespace ocl
+ +    {
+ +        enum
+ +        {
+ +            CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
+ +            CVCL_DEVICE_TYPE_CPU         = (1 << 1),
+ +            CVCL_DEVICE_TYPE_GPU         = (1 << 2),
+ +            CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
+ +            //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
+ +            CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
+ +        };
+ +
+ +        enum DevMemRW
+ +        {
+ +            DEVICE_MEM_R_W = 0,
+ +            DEVICE_MEM_R_ONLY,
+ +            DEVICE_MEM_W_ONLY
+ +        };
+ +
+ +        enum DevMemType
+ +        {
+ +            DEVICE_MEM_DEFAULT = 0,
+ +            DEVICE_MEM_AHP,         //alloc host pointer
+ +            DEVICE_MEM_UHP,         //use host pointer
+ +            DEVICE_MEM_CHP,         //copy host pointer
+ +            DEVICE_MEM_PM           //persistent memory
+ +        };
+ +
+ +        //Get the global device memory and read/write type
+ +        //return 1 if unified memory system supported, otherwise return 0
+ +        CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
+ +
+ +        //Set the global device memory and read/write type,
+ +        //the newly generated oclMat will all use this type
+ +        //return -1 if the target type is unsupported, otherwise return 0
+ +        CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
+ +
+ +        //this class contains ocl runtime information
+ +        class CV_EXPORTS Info
+ +        {
+ +        public:
+ +            struct Impl;
+ +            Impl *impl;
+ +
+ +            Info();
+ +            Info(const Info &m);
+ +            ~Info();
+ +            void release();
+ +            Info &operator = (const Info &m);
+ +            std::vector<std::string> DeviceName;
+ +        };
+ +        //////////////////////////////// Initialization & Info ////////////////////////
+ +        //this function may be obsoleted
+ +        //CV_EXPORTS cl_device_id getDevice();
+ +        //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
+ +        //each Info relates to an OpenCL platform
+ +        //there is one or more devices in each platform, each one has a separate name
+ +        CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
+ +
+ +        //set device you want to use, optional function after getDevice be called
+ +        //the devnum is the index of the selected device in DeviceName vector of INfo
+ +        CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
+ +
+ +        //optional function, if you want save opencl binary kernel to the file, set its path
+ +        CV_EXPORTS  void setBinpath(const char *path);
+ +
+ +        //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+ +        CV_EXPORTS void* getoclContext();
+ +
+ +        CV_EXPORTS void* getoclCommandQueue();
+ +
+ +        //this function enable ocl module to use customized cl_context and cl_command_queue
+ +        //getDevice also need to be called before this function
+ +        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
+ +
+ +        //////////////////////////////// Error handling ////////////////////////
+ +        CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
+ +
+ +        //////////////////////////////// OpenCL context ////////////////////////
+ +        //This is a global singleton class used to represent a OpenCL context.
-             static std::auto_ptr<Context> clCxt;
++        class CV_EXPORTS Context
+ +        {
+ +        protected:
+ +            Context();
+ +            friend class std::auto_ptr<Context>;
-             static int val;
+ +
++        private:
++            static std::auto_ptr<Context> clCxt;
++            static int val;
+ +        public:
+ +            ~Context();
-             struct Impl;
-             Impl *impl;
++            void release();
++            Info::Impl* impl;
++
+ +            static Context *getContext();
+ +            static void setContext(Info &oclinfo);
- 
-         //! Speeded up robust features, port from GPU module.
-         ////////////////////////////////// SURF //////////////////////////////////////////
- 
-         class CV_EXPORTS SURF_OCL
- 
-         {
- 
-         public:
- 
-             enum KeypointLayout
- 
-             {
- 
-                 X_ROW = 0,
- 
-                 Y_ROW,
- 
-                 LAPLACIAN_ROW,
- 
-                 OCTAVE_ROW,
- 
-                 SIZE_ROW,
- 
-                 ANGLE_ROW,
- 
-                 HESSIAN_ROW,
- 
-                 ROWS_COUNT
- 
-             };
- 
- 
- 
-             //! the default constructor
- 
-             SURF_OCL();
- 
-             //! the full constructor taking all the necessary parameters
- 
-             explicit SURF_OCL(double _hessianThreshold, int _nOctaves = 4,
- 
-                               int _nOctaveLayers = 2, bool _extended = false, float _keypointsRatio = 0.01f, bool _upright = false);
- 
- 
- 
-             //! returns the descriptor size in float's (64 or 128)
- 
-             int descriptorSize() const;
- 
- 
- 
-             //! upload host keypoints to device memory
- 
-             void uploadKeypoints(const std::vector<cv::KeyPoint> &keypoints, oclMat &keypointsocl);
- 
-             //! download keypoints from device to host memory
- 
-             void downloadKeypoints(const oclMat &keypointsocl, std::vector<KeyPoint> &keypoints);
- 
- 
- 
-             //! download descriptors from device to host memory
- 
-             void downloadDescriptors(const oclMat &descriptorsocl, std::vector<float> &descriptors);
- 
- 
- 
-             //! finds the keypoints using fast hessian detector used in SURF
- 
-             //! supports CV_8UC1 images
- 
-             //! keypoints will have nFeature cols and 6 rows
- 
-             //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
- 
-             //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
- 
-             //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
- 
-             //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
- 
-             //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
- 
-             //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
- 
-             //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
- 
-             void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints);
- 
-             //! finds the keypoints and computes their descriptors.
- 
-             //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
- 
-             void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors,
- 
-                             bool useProvidedKeypoints = false);
- 
- 
- 
-             void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints);
- 
-             void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, oclMat &descriptors,
- 
-                             bool useProvidedKeypoints = false);
- 
- 
- 
-             void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, std::vector<float> &descriptors,
- 
-                             bool useProvidedKeypoints = false);
- 
- 
- 
-             void releaseMemory();
- 
- 
- 
-             // SURF parameters
- 
-             float hessianThreshold;
- 
-             int nOctaves;
- 
-             int nOctaveLayers;
- 
-             bool extended;
- 
-             bool upright;
- 
- 
- 
-             //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
- 
-             float keypointsRatio;
- 
- 
- 
-             oclMat sum, mask1, maskSum, intBuffer;
- 
- 
- 
-             oclMat det, trace;
- 
- 
- 
-             oclMat maxPosBuffer;
- 
-         };
- 
++
++            enum {CL_DOUBLE, CL_UNIFIED_MEM};
++            bool supportsFeature(int ftype);
++            size_t computeUnits();
++            size_t maxWorkGroupSize();
++            void* oclContext();
++            void* oclCommandQueue();
+ +        };
+ +
+ +        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ +        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ +                                                        const char **source, std::string kernelName,
+ +                                                        size_t globalThreads[3], size_t localThreads[3],
+ +                                                        std::vector< std::pair<size_t, const void *> > &args,
+ +                                                        int channels, int depth, const char *build_options,
+ +                                                        bool finish = true, bool measureKernelTime = false,
+ +                                                        bool cleanUp = true);
+ +
+ +        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ +        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ +                                                        const char **fileName, const int numFiles, std::string kernelName,
+ +                                                        size_t globalThreads[3], size_t localThreads[3],
+ +                                                        std::vector< std::pair<size_t, const void *> > &args,
+ +                                                        int channels, int depth, const char *build_options,
+ +                                                        bool finish = true, bool measureKernelTime = false,
+ +                                                        bool cleanUp = true);
+ +
+ +        class CV_EXPORTS oclMatExpr;
+ +        //////////////////////////////// oclMat ////////////////////////////////
+ +        class CV_EXPORTS oclMat
+ +        {
+ +        public:
+ +            //! default constructor
+ +            oclMat();
+ +            //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ +            oclMat(int rows, int cols, int type);
+ +            oclMat(Size size, int type);
+ +            //! constucts oclMatrix and fills it with the specified value _s.
+ +            oclMat(int rows, int cols, int type, const Scalar &s);
+ +            oclMat(Size size, int type, const Scalar &s);
+ +            //! copy constructor
+ +            oclMat(const oclMat &m);
+ +
+ +            //! constructor for oclMatrix headers pointing to user-allocated data
+ +            oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +            oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +
+ +            //! creates a matrix header for a part of the bigger matrix
+ +            oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
+ +            oclMat(const oclMat &m, const Rect &roi);
+ +
+ +            //! builds oclMat from Mat. Perfom blocking upload to device.
+ +            explicit oclMat (const Mat &m);
+ +
+ +            //! destructor - calls release()
+ +            ~oclMat();
+ +
+ +            //! assignment operators
+ +            oclMat &operator = (const oclMat &m);
+ +            //! assignment operator. Perfom blocking upload to device.
+ +            oclMat &operator = (const Mat &m);
+ +            oclMat &operator = (const oclMatExpr& expr);
+ +
+ +            //! pefroms blocking upload data to oclMat.
+ +            void upload(const cv::Mat &m);
+ +
+ +
+ +            //! downloads data from device to host memory. Blocking calls.
+ +            operator Mat() const;
+ +            void download(cv::Mat &m) const;
+ +
+ +
+ +            //! returns a new oclMatrix header for the specified row
+ +            oclMat row(int y) const;
+ +            //! returns a new oclMatrix header for the specified column
+ +            oclMat col(int x) const;
+ +            //! ... for the specified row span
+ +            oclMat rowRange(int startrow, int endrow) const;
+ +            oclMat rowRange(const Range &r) const;
+ +            //! ... for the specified column span
+ +            oclMat colRange(int startcol, int endcol) const;
+ +            oclMat colRange(const Range &r) const;
+ +
+ +            //! returns deep copy of the oclMatrix, i.e. the data is copied
+ +            oclMat clone() const;
+ +            //! copies the oclMatrix content to "m".
+ +            // It calls m.create(this->size(), this->type()).
+ +            // It supports any data type
+ +            void copyTo( oclMat &m ) const;
+ +            //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            void copyTo( oclMat &m, const oclMat &mask ) const;
+ +            //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
+ +
+ +            void assignTo( oclMat &m, int type = -1 ) const;
+ +
+ +            //! sets every oclMatrix element to s
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            oclMat& operator = (const Scalar &s);
+ +            //! sets some of the oclMatrix elements to s, according to the mask
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
+ +            //! creates alternative oclMatrix header for the same data, with different
+ +            // number of channels and/or different number of rows. see cvReshape.
+ +            oclMat reshape(int cn, int rows = 0) const;
+ +
+ +            //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
+ +            // previous data is unreferenced if needed.
+ +            void create(int rows, int cols, int type);
+ +            void create(Size size, int type);
+ +
+ +            //! allocates new oclMatrix with specified device memory type.
+ +            void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
+ +            void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
+ +
+ +            //! decreases reference counter;
+ +            // deallocate the data when reference counter reaches 0.
+ +            void release();
+ +
+ +            //! swaps with other smart pointer
+ +            void swap(oclMat &mat);
+ +
+ +            //! locates oclMatrix header within a parent oclMatrix. See below
+ +            void locateROI( Size &wholeSize, Point &ofs ) const;
+ +            //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
+ +            oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ +            //! extracts a rectangular sub-oclMatrix
+ +            // (this is a generalized form of row, rowRange etc.)
+ +            oclMat operator()( Range rowRange, Range colRange ) const;
+ +            oclMat operator()( const Rect &roi ) const;
+ +
+ +            oclMat& operator+=( const oclMat& m );
+ +            oclMat& operator-=( const oclMat& m );
+ +            oclMat& operator*=( const oclMat& m );
+ +            oclMat& operator/=( const oclMat& m );
+ +
+ +            //! returns true if the oclMatrix data is continuous
+ +            // (i.e. when there are no gaps between successive rows).
+ +            // similar to CV_IS_oclMat_CONT(cvoclMat->type)
+ +            bool isContinuous() const;
+ +            //! returns element size in bytes,
+ +            // similar to CV_ELEM_SIZE(cvMat->type)
+ +            size_t elemSize() const;
+ +            //! returns the size of element channel in bytes.
+ +            size_t elemSize1() const;
+ +            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+ +            int type() const;
+ +            //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+ +            //! 3 channels element actually use 4 channel space
+ +            int ocltype() const;
+ +            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+ +            int depth() const;
+ +            //! returns element type, similar to CV_MAT_CN(cvMat->type)
+ +            int channels() const;
+ +            //! returns element type, return 4 for 3 channels element,
+ +            //!becuase 3 channels element actually use 4 channel space
+ +            int oclchannels() const;
+ +            //! returns step/elemSize1()
+ +            size_t step1() const;
+ +            //! returns oclMatrix size:
+ +            // width == number of columns, height == number of rows
+ +            Size size() const;
+ +            //! returns true if oclMatrix data is NULL
+ +            bool empty() const;
+ +
+ +            //! returns pointer to y-th row
+ +            uchar* ptr(int y = 0);
+ +            const uchar *ptr(int y = 0) const;
+ +
+ +            //! template version of the above method
+ +            template<typename _Tp> _Tp *ptr(int y = 0);
+ +            template<typename _Tp> const _Tp *ptr(int y = 0) const;
+ +
+ +            //! matrix transposition
+ +            oclMat t() const;
+ +
+ +            /*! includes several bit-fields:
+ +              - the magic signature
+ +              - continuity flag
+ +              - depth
+ +              - number of channels
+ +              */
+ +            int flags;
+ +            //! the number of rows and columns
+ +            int rows, cols;
+ +            //! a distance between successive rows in bytes; includes the gap if any
+ +            size_t step;
+ +            //! pointer to the data(OCL memory object)
+ +            uchar *data;
+ +
+ +            //! pointer to the reference counter;
+ +            // when oclMatrix points to user-allocated data, the pointer is NULL
+ +            int *refcount;
+ +
+ +            //! helper fields used in locateROI and adjustROI
+ +            //datastart and dataend are not used in current version
+ +            uchar *datastart;
+ +            uchar *dataend;
+ +
+ +            //! OpenCL context associated with the oclMat object.
+ +            Context *clCxt;
+ +            //add offset for handle ROI, calculated in byte
+ +            int offset;
+ +            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
+ +            int wholerows;
+ +            int wholecols;
+ +        };
+ +
+ +
+ +        ///////////////////// mat split and merge /////////////////////////////////
+ +        //! Compose a multi-channel array from several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
+ +        CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
+ +
+ +        //! Divides multi-channel array into several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void split(const oclMat &src, oclMat *dst);
+ +        CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
+ +
+ +        ////////////////////////////// Arithmetics ///////////////////////////////////
+ +        //#if defined DOUBLE_SUPPORT
+ +        //typedef double F;
+ +        //#else
+ +        //typedef float F;
+ +        //#endif
+ +        //    CV_EXPORTS void addWeighted(const oclMat& a,F  alpha, const oclMat& b,F beta,F gama, oclMat& c);
+ +        CV_EXPORTS void addWeighted(const oclMat &a, double  alpha, const oclMat &b, double beta, double gama, oclMat &c);
+ +        //! adds one matrix to another (c = a + b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! adds one matrix to another (c = a + b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ +        //! adds scalar to a matrix (c = a + s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ +        //! subtracts one matrix from another (c = a - b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! subtracts one matrix from another (c = a - b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ +        //! subtracts scalar from a matrix (c = a - s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ +        //! subtracts scalar from a matrix (c = a - s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
+ +        //! computes element-wise product of the two arrays (c = a * b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+ +        //! computes element-wise quotient of the two arrays (c = a / b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+ +        //! computes element-wise quotient of the two arrays (c = a / b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
+ +
+ +        //! compares elements of two arrays (c = a <cmpop> b)
+ +        // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
+ +        CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
+ +
+ +        //! transposes the matrix
+ +        // supports  CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
+ +        CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes element-wise absolute difference of two arrays (c = abs(a - b))
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
+ +
+ +        //! computes mean value and standard deviation of all or selected array elements
+ +        // supports except CV_32F,CV_64F
+ +        CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
+ +
+ +        //! computes norm of array
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports only CV_8UC1 type
+ +        CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
+ +
+ +        //! computes norm of the difference between two arrays
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports only CV_8UC1 type
+ +        CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
+ +
+ +        //! reverses the order of the rows, columns or both in a matrix
+ +        // supports all types
+ +        CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
+ +
+ +        //! computes sum of array elements
+ +        // disabled until fix crash
+ +        // support all types
+ +        CV_EXPORTS Scalar sum(const oclMat &m);
+ +        CV_EXPORTS Scalar absSum(const oclMat &m);
+ +        CV_EXPORTS Scalar sqrSum(const oclMat &m);
+ +
+ +        //! finds global minimum and maximum array elements and returns their values
+ +        // support all C1 types
+ +
+ +        CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+ +
+ +        //! finds global minimum and maximum array elements and returns their values with locations
+ +        // support all C1 types
+ +
+ +        CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
+ +                                  const oclMat &mask = oclMat());
+ +
+ +        //! counts non-zero array elements
+ +        // support all types
+ +        CV_EXPORTS int countNonZero(const oclMat &src);
+ +
+ +        //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+ +        // destination array will have the depth type as lut and the same channels number as source
+ +        //It supports 8UC1 8UC4 only
+ +        CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
+ +
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+ +        //! bilateralFilter
+ +        // supports 8UC1 8UC4
+ +        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+ +        //! computes exponent of each matrix element (b = e**a)
+ +        // supports only CV_32FC1 type
+ +        CV_EXPORTS void exp(const oclMat &a, oclMat &b);
+ +
+ +        //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
+ +        // supports only CV_32FC1 type
+ +        CV_EXPORTS void log(const oclMat &a, oclMat &b);
+ +
+ +        //! computes magnitude of each (x(i), y(i)) vector
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ +        CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ +
+ +        CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
+ +
+ +        //! computes angle (angle(i)) of each (x(i), y(i)) vector
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! the function raises every element of tne input array to p
+ +        //! support only CV_32F CV_64F type
+ +        CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
+ +
+ +        //! converts Cartesian coordinates to polar
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! converts polar coordinates to Cartesian
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
+ +
+ +        //! perfroms per-elements bit-wise inversion
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
+ +        //! calculates per-element bit-wise disjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! calculates per-element bit-wise conjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! calculates per-element bit-wise "exclusive or" operation
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! Logical operators
+ +        CV_EXPORTS oclMat operator ~ (const oclMat &);
+ +        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+ +
+ +
+ +        //! Mathematics operators
+ +        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
+ +
+ +        //! computes convolution of two images
+ +        //! support only CV_32FC1 type
+ +        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
+ +
+ +        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
+ +
+ +        //////////////////////////////// Filter Engine ////////////////////////////////
+ +
+ +        /*!
+ +          The Base Class for 1D or Row-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process 1D data.
+ +          In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseRowFilter_GPU
+ +        {
+ +        public:
+ +            BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseRowFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Column-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process columns of 2D arrays.
+ +          Such filters are used for the "vertical" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseColumnFilter_GPU
+ +        {
+ +        public:
+ +            BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseColumnFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Non-Separable 2D Filters.
+ +
+ +          This is the base class for linear or non-linear 2D filters.
+ +          */
+ +        class CV_EXPORTS BaseFilter_GPU
+ +        {
+ +        public:
+ +            BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
+ +                : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
+ +            virtual ~BaseFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            Size ksize;
+ +            Point anchor;
+ +            int borderType;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Filter Engine.
+ +
+ +          The class can be used to apply an arbitrary filtering operation to an image.
+ +          It contains all the necessary intermediate buffers.
+ +          */
+ +        class CV_EXPORTS FilterEngine_GPU
+ +        {
+ +        public:
+ +            virtual ~FilterEngine_GPU() {}
+ +
+ +            virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
+ +        };
+ +
+ +        //! returns the non-separable filter engine with the specified filter
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
+ +
+ +        //! returns the primitive row filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns the primitive column filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
+ +
+ +        //! returns the separable linear filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
+ +                const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns the separable filter engine with the specified filters
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ +                const Ptr<BaseColumnFilter_GPU> &columnFilter);
+ +
+ +        //! returns the Gaussian filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns filter engine for the generalized Sobel operator
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
+ +
+ +        //! applies Laplacian operator to the image
+ +        // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
+ +        CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
+ +
+ +        //! returns 2D box filter
+ +        // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
+ +                const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns box filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D filter with the specified kernel
+ +        // supports CV_8UC1 and CV_8UC4 types
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
+ +                Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns the non-separable linear filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using the normalized box filter
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
+ +        CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
+ +                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D morphological filter
+ +        //! only MORPH_ERODE and MORPH_DILATE are supported
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
+ +                Point anchor = Point(-1, -1));
+ +
+ +        //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int iterations = 1);
+ +
+ +        //! a synonym for normalized box filter
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
+ +                                int borderType = BORDER_CONSTANT)
+ +        {
+ +            boxFilter(src, dst, -1, ksize, anchor, borderType);
+ +        }
+ +
+ +        //! applies non-separable 2D linear filter to the image
+ +        CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
+ +                                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! applies separable 2D linear filter to the image
+ +        CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
+ +                                    Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies generalized Sobel operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies the vertical or horizontal Scharr operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using Gaussian filter.
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! erodes the image (applies the local minimum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                               int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! dilates the image (applies the local maximum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! applies an advanced morphological operation to the image
+ +        CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                      int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        ////////////////////////////// Image processing //////////////////////////////
+ +        //! Does mean shift filtering on GPU.
+ +        CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
+ +                                           TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift procedure on GPU.
+ +        CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
+ +                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift segmentation with elimiation of small regions.
+ +        CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
+ +                                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! applies fixed threshold to the image.
+ +        // supports CV_8UC1 and CV_32FC1 data type
+ +        // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
+ +        CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
+ +
+ +        //! resizes the image
+ +        // Supports INTER_NEAREST, INTER_LINEAR
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
+ +
+ +        //! Applies a generic geometrical transformation to an image.
+ +
+ +        // Supports INTER_NEAREST, INTER_LINEAR.
+ +
+ +        // Map1 supports CV_16SC2, CV_32FC2  types.
+ +
+ +        // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
+ +
+ +        CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
+ +
+ +        //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+ +        // supports CV_8UC1, CV_8UC4, CV_32SC1 types
+ +        CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
+ +
+ +        //! Smoothes image using median filter
+ +        // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
+ +        CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
+ +
+ +        //! warps the image using affine transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! warps the image using perspective transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! computes the integral image and integral for the squared image
+ +        // sum will have CV_32S type, sqsum - CV32F type
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
+ +        CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ +
+ +        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +
+ +        class CV_EXPORTS_W OclCascadeClassifier : public  cv::CascadeClassifier
+ +        {
+ +        public:
+ +            OclCascadeClassifier() {};
+ +            ~OclCascadeClassifier() {};
+ +
+ +            CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
+ +                                        int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
+ +        };
+ +
+ +
+ +
+ +        /////////////////////////////// Pyramid /////////////////////////////////////
+ +        CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
+ +
+ +        //! upsamples the source image and then smoothes it
+ +        CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
+ +
+ +        //! performs linear blending of two images
+ +        //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
+ +
+ +        //! computes vertical sum, supports only CV_32FC1 images
+ +        CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
+ +
+ +        ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+ +        struct CV_EXPORTS MatchTemplateBuf
+ +        {
+ +            Size user_block_size;
+ +            oclMat imagef, templf;
+ +            std::vector<oclMat> images;
+ +            std::vector<oclMat> image_sums;
+ +            std::vector<oclMat> image_sqsums;
+ +        };
+ +
+ +
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
+ +
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
+ +
+ +
+ +
+ +        ///////////////////////////////////////////// Canny /////////////////////////////////////////////
+ +
+ +        struct CV_EXPORTS CannyBuf;
+ +
+ +
+ +
+ +        //! compute edges of the input image using Canny operator
+ +
+ +        // Support CV_8UC1 only
+ +
+ +        CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +
+ +        CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +
+ +
+ +
+ +        struct CV_EXPORTS CannyBuf
+ +
+ +        {
+ +
+ +            CannyBuf() : counter(NULL) {}
+ +
+ +            ~CannyBuf()
+ +            {
+ +                release();
+ +            }
+ +
+ +            explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
+ +
+ +            {
+ +
+ +                create(image_size, apperture_size);
+ +
+ +            }
+ +
+ +            CannyBuf(const oclMat &dx_, const oclMat &dy_);
+ +
+ +
+ +
+ +            void create(const Size &image_size, int apperture_size = 3);
+ +
+ +
+ +
+ +            void release();
+ +
+ +
+ +
+ +            oclMat dx, dy;
+ +
+ +            oclMat dx_buf, dy_buf;
+ +
+ +            oclMat edgeBuf;
+ +
+ +            oclMat trackBuf1, trackBuf2;
+ +
+ +            void *counter;
+ +
+ +            Ptr<FilterEngine_GPU> filterDX, filterDY;
+ +
+ +        };
+ +
+ +        ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
+ +        //! HoughCircles
+ +        struct HoughCirclesBuf
+ +        {
+ +            oclMat edges;
+ +            oclMat accum;
+ +            oclMat srcPoints;
+ +            oclMat centers;
+ +            CannyBuf cannyBuf;
+ +        };
+ +
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
+ +
+ +
+ +        ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+ +        //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ +        //! Param dft_size is the size of DFT transform.
+ +        //!
+ +        //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+ +        // support src type of CV32FC1, CV32FC2
+ +        // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+ +        // dft_size is the size of original input, which is used for transformation from complex to real.
+ +        // dft_size must be powers of 2, 3 and 5
+ +        // real to complex dft requires at least v1.8 clAmdFft
+ +        // real to complex dft output is not the same with cpu version
+ +        // real to complex and complex to real does not support DFT_ROWS
+ +        CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
+ +
+ +        //! implements generalized matrix product algorithm GEMM from BLAS
+ +        // The functionality requires clAmdBlas library
+ +        // only support type CV_32FC1
+ +        // flag GEMM_3_T is not supported
+ +        CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ +                             const oclMat &src3, double beta, oclMat &dst, int flags = 0);
+ +
+ +        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+ +
+ +        struct CV_EXPORTS HOGDescriptor
+ +
+ +        {
+ +
+ +            enum { DEFAULT_WIN_SIGMA = -1 };
+ +
+ +            enum { DEFAULT_NLEVELS = 64 };
+ +
+ +            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+ +
+ +
+ +
+ +            HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
+ +
+ +                          Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
+ +
+ +                          int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
+ +
+ +                          double threshold_L2hys = 0.2, bool gamma_correction = true,
+ +
+ +                          int nlevels = DEFAULT_NLEVELS);
+ +
+ +
+ +
+ +            size_t getDescriptorSize() const;
+ +
+ +            size_t getBlockHistogramSize() const;
+ +
+ +
+ +
+ +            void setSVMDetector(const std::vector<float> &detector);
+ +
+ +
+ +
+ +            static std::vector<float> getDefaultPeopleDetector();
+ +
+ +            static std::vector<float> getPeopleDetector48x96();
+ +
+ +            static std::vector<float> getPeopleDetector64x128();
+ +
+ +
+ +
+ +            void detect(const oclMat &img, std::vector<Point> &found_locations,
+ +
+ +                        double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                        Size padding = Size());
+ +
+ +
+ +
+ +            void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
+ +
+ +                                  double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                                  Size padding = Size(), double scale0 = 1.05,
+ +
+ +                                  int group_threshold = 2);
+ +
+ +
+ +
+ +            void getDescriptors(const oclMat &img, Size win_stride,
+ +
+ +                                oclMat &descriptors,
+ +
+ +                                int descr_format = DESCR_FORMAT_COL_BY_COL);
+ +
+ +
+ +
+ +            Size win_size;
+ +
+ +            Size block_size;
+ +
+ +            Size block_stride;
+ +
+ +            Size cell_size;
+ +
+ +            int nbins;
+ +
+ +            double win_sigma;
+ +
+ +            double threshold_L2hys;
+ +
+ +            bool gamma_correction;
+ +
+ +            int nlevels;
+ +
+ +
+ +
+ +        protected:
+ +
+ +            // initialize buffers; only need to do once in case of multiscale detection
+ +
+ +            void init_buffer(const oclMat &img, Size win_stride);
+ +
+ +
+ +
+ +            void computeBlockHistograms(const oclMat &img);
+ +
+ +            void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
+ +
+ +
+ +
+ +            double getWinSigma() const;
+ +
+ +            bool checkDetectorSize() const;
+ +
+ +
+ +
+ +            static int numPartsWithin(int size, int part_size, int stride);
+ +
+ +            static Size numPartsWithin(Size size, Size part_size, Size stride);
+ +
+ +
+ +
+ +            // Coefficients of the separating plane
+ +
+ +            float free_coef;
+ +
+ +            oclMat detector;
+ +
+ +
+ +
+ +            // Results of the last classification step
+ +
+ +            oclMat labels;
+ +
+ +            Mat labels_host;
+ +
+ +
+ +
+ +            // Results of the last histogram evaluation step
+ +
+ +            oclMat block_hists;
+ +
+ +
+ +
+ +            // Gradients conputation results
+ +
+ +            oclMat grad, qangle;
+ +
+ +
+ +
+ +            // scaled image
+ +
+ +            oclMat image_scale;
+ +
+ +
+ +
+ +            // effect size of input image (might be different from original size after scaling)
+ +
+ +            Size effect_size;
+ +
+ +        };
+ +
+ +
+ +        ////////////////////////feature2d_ocl/////////////////
+ +        /****************************************************************************************\
+ +        *                                      Distance                                          *
+ +        \****************************************************************************************/
+ +
+ +        template<typename T>
+ +        struct CV_EXPORTS Accumulator
+ +        {
+ +            typedef T Type;
+ +        };
+ +
+ +        template<> struct Accumulator<unsigned char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<unsigned short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +
+ +        /*
+ +         * Manhattan distance (city block distance) functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L1
+ +        {
+ +            enum { normType = NORM_L1 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return normL1<ValueType, ResultType>(a, b, size);
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Euclidean distance functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L2
+ +        {
+ +            enum { normType = NORM_L2 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ +         * bit count of A exclusive XOR'ed with B
+ +         */
+ +        struct CV_EXPORTS Hamming
+ +        {
+ +            enum { normType = NORM_HAMMING };
+ +            typedef unsigned char ValueType;
+ +            typedef int ResultType;
+ +
+ +            /** this will count the bits in a ^ b
+ +             */
+ +            ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
+ +            {
+ +                return normHamming(a, b, size);
+ +            }
+ +        };
+ +
+ +        ////////////////////////////////// BruteForceMatcher //////////////////////////////////
+ +
+ +        class CV_EXPORTS BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            enum DistType {L1Dist = 0, L2Dist, HammingDist};
+ +
+ +            explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
+ +
+ +
+ +
+ +            // Add descriptors to train descriptor collection
+ +
+ +            void add(const std::vector<oclMat> &descCollection);
+ +
+ +
+ +
+ +            // Get train descriptors collection
+ +
+ +            const std::vector<oclMat> &getTrainDescriptors() const;
+ +
+ +
+ +
+ +            // Clear train descriptors collection
+ +
+ +            void clear();
+ +
+ +
+ +
+ +            // Return true if there are not train descriptors in collection
+ +
+ +            bool empty() const;
+ +
+ +
+ +
+ +            // Return true if the matcher supports mask in match methods
+ +
+ +            bool isMaskSupported() const;
+ +
+ +
+ +
+ +            // Find one best match for each query descriptor
+ +
+ +            void matchSingle(const oclMat &query, const oclMat &train,
+ +
+ +                             oclMat &trainIdx, oclMat &distance,
+ +
+ +                             const oclMat &mask = oclMat());
+ +
+ +
+ +
+ +            // Download trainIdx and distance and convert it to CPU vector with DMatch
+ +
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ +
+ +            // Convert trainIdx and distance to vector with DMatch
+ +
+ +            static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
+ +
+ +
+ +            // Find one best match for each query descriptor
+ +
+ +            void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
+ +
+ +
+ +
+ +            // Make gpu collection of trains and masks in suitable format for matchCollection function
+ +
+ +            void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +
+ +
+ +            // Find one best match from train collection for each query descriptor
+ +
+ +            void matchCollection(const oclMat &query, const oclMat &trainCollection,
+ +
+ +                                 oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ +
+ +                                 const oclMat &masks = oclMat());
+ +
+ +
+ +
+ +            // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+ +
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ +
+ +            // Convert trainIdx, imgIdx and distance to vector with DMatch
+ +
+ +            static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
+ +
+ +
+ +            // Find one best match from train collection for each query descriptor.
+ +
+ +            void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +
+ +
+ +            // Find k best matches for each query descriptor (in increasing order of distances)
+ +
+ +            void knnMatchSingle(const oclMat &query, const oclMat &train,
+ +
+ +                                oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
+ +
+ +                                const oclMat &mask = oclMat());
+ +
+ +
+ +
+ +            // Download trainIdx and distance and convert it to vector with DMatch
+ +
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +
+ +            static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
+ +
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Convert trainIdx and distance to vector with DMatch
+ +
+ +            static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
+ +
+ +                                        std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +
+ +
+ +            // Find k best matches for each query descriptor (in increasing order of distances).
+ +
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +
+ +            void knnMatch(const oclMat &query, const oclMat &train,
+ +
+ +                          std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
+ +
+ +                          bool compactResult = false);
+ +
+ +
+ +
+ +            // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+ +
+ +            void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
+ +
+ +                                     oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ +
+ +                                     const oclMat &maskCollection = oclMat());
+ +
+ +
+ +
+ +            // Download trainIdx and distance and convert it to vector with DMatch
+ +
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +
+ +            static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
+ +
+ +                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Convert trainIdx and distance to vector with DMatch
+ +
+ +            static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
+ +
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +
+ +
+ +            // Find k best matches  for each query descriptor (in increasing order of distances).
+ +
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +
+ +            void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
+ +
+ +                          const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
+ +
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
+ +
+ +            // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+ +
+ +            // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+ +
+ +            // because it didn't have enough memory.
+ +
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+ +
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +
+ +            // Matches doesn't sorted.
+ +
+ +            void radiusMatchSingle(const oclMat &query, const oclMat &train,
+ +
+ +                                   oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ +
+ +                                   const oclMat &mask = oclMat());
+ +
+ +
+ +
+ +            // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+ +
+ +            // matches will be sorted in increasing order of distances.
+ +
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
+ +
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
+ +
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance
+ +
+ +            // in increasing order of distances).
+ +
+ +            void radiusMatch(const oclMat &query, const oclMat &train,
+ +
+ +                             std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ +
+ +                             const oclMat &mask = oclMat(), bool compactResult = false);
+ +
+ +
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
+ +
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+ +
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +
+ +            // Matches doesn't sorted.
+ +
+ +            void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ +
+ +                                       const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +
+ +
+ +            // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+ +
+ +            // matches will be sorted in increasing order of distances.
+ +
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
+ +
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
+ +
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +
+ +
+ +            // Find best matches from train collection for each query descriptor which have distance less than
+ +
+ +            // maxDistance (in increasing order of distances).
+ +
+ +            void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ +
+ +                             const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
+ +
+ +
+ +            DistType distType;
+ +
+ +
+ +
+ +        private:
+ +
+ +            std::vector<oclMat> trainDescCollection;
+ +
+ +        };
+ +
+ +
+ +
+ +        template <class Distance>
+ +
+ +        class CV_EXPORTS BruteForceMatcher_OCL;
+ +
+ +
+ +
+ +        template <typename T>
+ +
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
+ +
+ +        {
+ +
+ +        public:
+ +
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
+ +
+ +            explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
+ +
+ +        };
+ +
+ +        template <typename T>
+ +
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
+ +
+ +        {
+ +
+ +        public:
+ +
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
+ +
+ +            explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
+ +
+ +        };
+ +
+ +        template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
+ +
+ +        {
+ +
+ +        public:
+ +
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
+ +
+ +            explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
+ +
+ +        };
+ +
+ +
+ +
+ +        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+ +
+ +        class CV_EXPORTS PyrLKOpticalFlow
+ +
+ +        {
+ +
+ +        public:
+ +
+ +            PyrLKOpticalFlow()
+ +
+ +            {
+ +
+ +                winSize = Size(21, 21);
+ +
+ +                maxLevel = 3;
+ +
+ +                iters = 30;
+ +
+ +                derivLambda = 0.5;
+ +
+ +                useInitialFlow = false;
+ +
+ +                minEigThreshold = 1e-4f;
+ +
+ +                getMinEigenVals = false;
+ +
+ +                isDeviceArch11_ = false;
+ +
+ +            }
+ +
+ +
+ +
+ +            void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
+ +
+ +                        oclMat &status, oclMat *err = 0);
+ +
+ +
+ +
+ +            void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
+ +
+ +
+ +
+ +            Size winSize;
+ +
+ +            int maxLevel;
+ +
+ +            int iters;
+ +
+ +            double derivLambda;
+ +
+ +            bool useInitialFlow;
+ +
+ +            float minEigThreshold;
+ +
+ +            bool getMinEigenVals;
+ +
+ +
+ +
+ +            void releaseMemory()
+ +
+ +            {
+ +
+ +                dx_calcBuf_.release();
+ +
+ +                dy_calcBuf_.release();
+ +
+ +
+ +
+ +                prevPyr_.clear();
+ +
+ +                nextPyr_.clear();
+ +
+ +
+ +
+ +                dx_buf_.release();
+ +
+ +                dy_buf_.release();
+ +
+ +            }
+ +
+ +
+ +
+ +        private:
+ +
+ +            void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
+ +
+ +
+ +
+ +            void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
+ +
+ +
+ +
+ +            oclMat dx_calcBuf_;
+ +
+ +            oclMat dy_calcBuf_;
+ +
+ +
+ +
+ +            std::vector<oclMat> prevPyr_;
+ +
+ +            std::vector<oclMat> nextPyr_;
+ +
+ +
+ +
+ +            oclMat dx_buf_;
+ +
+ +            oclMat dy_buf_;
+ +
+ +
+ +
+ +            oclMat uPyr_[2];
+ +
+ +            oclMat vPyr_[2];
+ +
+ +
+ +
+ +            bool isDeviceArch11_;
+ +
+ +        };
+ +        //////////////// build warping maps ////////////////////
+ +        //! builds plane warping maps
+ +        CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds cylindrical warping maps
+ +        CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds spherical warping maps
+ +        CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds Affine warping maps
+ +        CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        //! builds Perspective warping maps
+ +        CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
+ +        //! Interpolate frames (images) using provided optical flow (displacement field).
+ +        //! frame0   - frame 0 (32-bit floating point images, single channel)
+ +        //! frame1   - frame 1 (the same type and size)
+ +        //! fu       - forward horizontal displacement
+ +        //! fv       - forward vertical displacement
+ +        //! bu       - backward horizontal displacement
+ +        //! bv       - backward vertical displacement
+ +        //! pos      - new frame position
+ +        //! newFrame - new frame
+ +        //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
+ +        //!            occlusion masks            0, occlusion masks            1,
+ +        //!            interpolated forward flow  0, interpolated forward flow  1,
+ +        //!            interpolated backward flow 0, interpolated backward flow 1
+ +        //!
+ +        CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
+ +                                          const oclMat &fu, const oclMat &fv,
+ +                                          const oclMat &bu, const oclMat &bv,
+ +                                          float pos, oclMat &newFrame, oclMat &buf);
+ +
+ +        //! computes moments of the rasterized shape or a vector of points
+ +        CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
++
++        class CV_EXPORTS StereoBM_OCL
++        {
++        public:
++            enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
++
++            enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
++
++            //! the default constructor
++            StereoBM_OCL();
++            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
++            StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
++
++            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
++            //! Output disparity has CV_8U type.
++            void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
++
++            //! Some heuristics that tries to estmate
++            // if current GPU will be faster then CPU in this algorithm.
++            // It queries current active device.
++            static bool checkIfGpuCallReasonable();
++
++            int preset;
++            int ndisp;
++            int winSize;
++
++            // If avergeTexThreshold  == 0 => post procesing is disabled
++            // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
++            // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
++            // i.e. input left image is low textured.
++            float avergeTexThreshold;
++        private:
++            oclMat minSSD, leBuf, riBuf;
++        };
+ +    }
+ +}
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( push)
+ +#  pragma warning( disable: 4267)
+ +#endif
+ +#include "opencv2/ocl/matrix_operations.hpp"
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( pop)
+ +#endif
+ +
+ +#endif /* __OPENCV_OCL_HPP__ */
diff --cc modules/ocl/include/opencv2/ocl/ocl.hpp

index e982774,c321633..eb8ff42
--- 1/modules/ocl/include/opencv2/ocl/ocl.hpp
--- 2/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@@ -7,12 -7,12 +7,12 @@@
   //  copy or use the software.
   //
   //
- //                          License Agreement
+ //                           License Agreement
   //                For Open Source Computer Vision Library
   //
- -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
- -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
- -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
   // Third party copyrights are property of their respective owners.
   //
   // Redistribution and use in source and binary forms, with or without modification,
diff --cc modules/ocl/include/opencv2/ocl/private/util.hpp

index 0000000,405d92c..a60521e

mode 000000,100644..100644
--- /dev/null
--- 2/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@@ -1,0 -1,130 +1,130 @@@
- -#include "opencv2/ocl/ocl.hpp"
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // @Authors
+ //    Peng Xiao, pengxiao@multicorewareinc.com
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other oclMaterials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors as is and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #ifndef __OPENCV_OCL_PRIVATE_UTIL__
+ #define __OPENCV_OCL_PRIVATE_UTIL__
+ 
- -        void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
++#include "opencv2/ocl.hpp"
+ 
+ #if defined __APPLE__
+ #include <OpenCL/OpenCL.h>
+ #else
+ #include <CL/opencl.h>
+ #endif
+ 
+ namespace cv
+ {
+     namespace ocl
+     {
+         enum openCLMemcpyKind
+         {
+             clMemcpyHostToDevice = 0,
+             clMemcpyDeviceToHost,
+             clMemcpyDeviceToDevice
+         };
+         ///////////////////////////OpenCL call wrappers////////////////////////////
+         void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
+                                           size_t widthInBytes, size_t height);
+         void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
+                                             size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
+         void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
+                                        const void *src, size_t spitch,
+                                        size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
+         void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
+                                            const void *src, size_t spitch,
+                                            size_t width, size_t height, int src_offset);
+         void CV_EXPORTS openCLFree(void *devPtr);
+         cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
+         void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
+         cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
+                                                        const char **source, std::string kernelName);
+         cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
+                                                        const char **source, std::string kernelName, const char *build_options);
+         void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
++        void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, std::vector< std::pair<size_t, const void *> > &args,
+                                  int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
+         void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
+                                   size_t globalThreads[3], size_t localThreads[3],
+                                   std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
+         void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                                  size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
+         void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                                  size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
+                                  int depth, const char *build_options);
+ 
+         cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+                              const size_t size);
+ 
+         cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
+ 
+         int CV_EXPORTS savetofile(const Context *clcxt,  cl_program &program, const char *fileName);
+ 
+         enum FLUSH_MODE
+         {
+             CLFINISH = 0,
+             CLFLUSH,
+             DISABLE
+         };
+ 
+         void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
+         void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
+                                   int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
+         // bind oclMat to OpenCL image textures
+         // note:
+         //   1. there is no memory management. User need to explicitly release the resource
+         //   2. for faster clamping, there is no buffer padding for the constructed texture
+         cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
+         void CV_EXPORTS releaseTexture(cl_mem& texture);
+ 
+         // returns whether the current context supports image2d_t format or not
+         bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
+ 
+     }//namespace ocl
+ 
+ }//namespace cv
+ 
+ #endif //__OPENCV_OCL_PRIVATE_UTIL__
diff --cc modules/ocl/src/arithm.cpp

index 2326888,410e460..ec17300
--- 1/modules/ocl/src/arithm.cpp
--- 2/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@@ -129,9 -130,9 +129,9 @@@ inline int divUp(int total, int grain
   /////////////////////// add subtract multiply divide /////////////////////////
   //////////////////////////////////////////////////////////////////////////////
   template<typename T>
- -void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
+ +void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, std::string kernelName, const char **kernelString, void *_scalar)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -192,9 -193,9 +192,9 @@@ static void arithmetic_run(const oclMa
   {
       arithmetic_run<char>(src1, src2, dst, kernelName, kernelString, (void *)NULL);
   }
- -static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString)
+ +static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, std::string kernelName, const char **kernelString)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -286,9 -287,9 +286,9 @@@ void cv::ocl::divide(const oclMat &src1
   
   }
   template <typename WT , typename CL_WT>
- -void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar)
+ +void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, std::string kernelName, const char **kernelString, int isMatSubScalar)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -358,9 -359,9 +358,9 @@@
       openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, channels, depth);
   }
   
- -static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar)
+ +static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, std::string kernelName, const char **kernelString, double scalar)
   {
-     if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+     if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -393,19 -394,19 +393,19 @@@
                                 };
   
       int dst_step1 = dst.cols * dst.elemSize();
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset ));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
   
-     if(src.clCxt -> impl -> double_support != 0)
+     if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
- -        args.push_back( make_pair( sizeof(cl_double), (void *)&scalar ));
+ +        args.push_back( std::make_pair( sizeof(cl_double), (void *)&scalar ));
       else
       {
           float f_scalar = (float)scalar;
@@@ -523,12 -524,12 +523,12 @@@ static void compare_run(const oclMat &s
   
   void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
- -    string kernelName;
+ +    std::string kernelName;
       const char **kernelString = NULL;
       switch( cmpOp )
       {
@@@ -891,9 -892,9 +891,9 @@@ double cv::ocl::norm(const oclMat &src1
   //////////////////////////////////////////////////////////////////////////////
   ////////////////////////////////// flip //////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////
- -static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
+ +static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, std::string kernelName)
   {
-     if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+     if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -940,9 -941,9 +940,9 @@@
   
       openCLExecuteKernel(clCxt, &arithm_flip, kernelName, globalThreads, localThreads, args, -1, depth);
   }
- -static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
+ +static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, std::string kernelName, bool isVertical)
   {
-     if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+     if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -1161,9 -1162,9 +1161,9 @@@ void cv::ocl::log(const oclMat &src, oc
   //////////////////////////////////////////////////////////////////////////////
   ////////////////////////////// magnitude phase ///////////////////////////////
   //////////////////////////////////////////////////////////////////////////////
- -static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
+ +static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, std::string kernelName)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -1209,9 -1210,9 +1209,9 @@@ void cv::ocl::magnitude(const oclMat &s
       arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude");
   }
   
- -static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
+ +static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, std::string kernelName, const char **kernelString)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -1273,9 -1274,9 +1273,9 @@@ void cv::ocl::phase(const oclMat &x, co
   ////////////////////////////////// cartToPolar ///////////////////////////////
   //////////////////////////////////////////////////////////////////////////////
   static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
- -                                string kernelName, bool angleInDegrees)
+ +                                std::string kernelName, bool angleInDegrees)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -1328,9 -1329,9 +1328,9 @@@ void cv::ocl::cartToPolar(const oclMat 
   ////////////////////////////////// polarToCart ///////////////////////////////
   //////////////////////////////////////////////////////////////////////////////
   static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
- -                        string kernelName)
+ +                        std::string kernelName)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -1844,9 -1845,9 +1844,9 @@@ static void bitwise_scalar(const oclMa
   
   void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
   {
-     if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+     if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
       dst.create(src.size(), src.type());
@@@ -1857,9 -1858,9 +1857,9 @@@
   void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
   {
       // dst.create(src1.size(),src1.type());
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
       oclMat emptyMat;
@@@ -1873,12 -1874,12 +1873,12 @@@
   
   void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
- -    string kernelName = mask.data ? "arithm_s_bitwise_or_with_mask" : "arithm_s_bitwise_or";
+ +    std::string kernelName = mask.data ? "arithm_s_bitwise_or_with_mask" : "arithm_s_bitwise_or";
       if (mask.data)
           bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_or_scalar_mask);
       else
@@@ -1888,9 -1889,9 +1888,9 @@@
   void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
   {
       //    dst.create(src1.size(),src1.type());
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
       oclMat emptyMat;
@@@ -1905,12 -1906,12 +1905,12 @@@
   
   void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
- -    string kernelName = mask.data ? "arithm_s_bitwise_and_with_mask" : "arithm_s_bitwise_and";
+ +    std::string kernelName = mask.data ? "arithm_s_bitwise_and_with_mask" : "arithm_s_bitwise_and";
       if (mask.data)
           bitwise_scalar(src1, src2, dst, mask, kernelName, &arithm_bitwise_and_scalar_mask);
       else
@@@ -1919,9 -1920,9 +1919,9 @@@
   
   void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
       oclMat emptyMat;
@@@ -1938,12 -1939,12 +1938,12 @@@
   void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
   {
   
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
- -    string kernelName = mask.data ? "arithm_s_bitwise_xor_with_mask" : "arithm_s_bitwise_xor";
+ +    std::string kernelName = mask.data ? "arithm_s_bitwise_xor_with_mask" : "arithm_s_bitwise_xor";
       if (mask.data)
           bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar_mask);
       else
@@@ -2033,9 -2034,9 +2033,9 @@@ oclMatExpr::operator oclMat() cons
   //////////////////////////////////////////////////////////////////////////////
   #define TILE_DIM      (32)
   #define BLOCK_ROWS    (256/TILE_DIM)
- -static void transpose_run(const oclMat &src, oclMat &dst, string kernelName)
+ +static void transpose_run(const oclMat &src, oclMat &dst, std::string kernelName)
   {
-     if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+     if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
@@@ -2126,19 -2127,19 +2126,19 @@@ void cv::ocl::addWeighted(const oclMat 
       int src2_step = (int) src2.step;
       int dst_step  = (int) dst.step;
       float alpha_f = alpha, beta_f = beta, gama_f = gama;
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset));
   
-     if(src1.clCxt -> impl -> double_support != 0)
+     if(src1.clCxt->supportsFeature(Context::CL_DOUBLE))
       {
- -        args.push_back( make_pair( sizeof(cl_double), (void *)&alpha ));
- -        args.push_back( make_pair( sizeof(cl_double), (void *)&beta ));
- -        args.push_back( make_pair( sizeof(cl_double), (void *)&gama ));
+ +        args.push_back( std::make_pair( sizeof(cl_double), (void *)&alpha ));
+ +        args.push_back( std::make_pair( sizeof(cl_double), (void *)&beta ));
+ +        args.push_back( std::make_pair( sizeof(cl_double), (void *)&gama ));
       }
       else
       {
@@@ -2271,31 -2272,31 +2271,31 @@@ static void arithmetic_pow_run(const oc
                                 };
   
       int dst_step1 = dst.cols * dst.elemSize();
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
-     if(src1.clCxt -> impl -> double_support == 0)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
       {
           float pf = p;
- -        args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
+ +        args.push_back( std::make_pair( sizeof(cl_float), (void *)&pf ));
       }
       else
- -        args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
+ +        args.push_back( std::make_pair( sizeof(cl_double), (void *)&p ));
   
       openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
   }
   void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
   {
-     if(x.clCxt -> impl -> double_support == 0 && x.type() == CV_64F)
+     if(!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F)
       {
- -        cout << "Selected device do not support double" << endl;
+ +        std::cout << "Selected device do not support double" << std::endl;
           return;
       }
   
diff --cc modules/ocl/src/canny.cpp

index a7871d4,ae92bc7..1408b00
--- 1/modules/ocl/src/canny.cpp
--- 2/modules/ocl/src/canny.cpp
+++ b/modules/ocl/src/canny.cpp
@@@ -355,10 -354,10 +353,10 @@@ void canny::edgesHysteresisLocal_gpu(oc
   void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
   {
       unsigned int count;
-     openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
+     openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
       Context *clCxt = map.clCxt;
- -    string kernelName = "edgesHysteresisGlobal";
- -    vector< pair<size_t, const void *> > args;
+ +    std::string kernelName = "edgesHysteresisGlobal";
+ +    std::vector< std::pair<size_t, const void *> > args;
       size_t localThreads[3]  = {128, 1, 1};
   
   #define DIVUP(a, b) ((a)+(b)-1)/(b)
@@@ -369,18 -368,18 +367,18 @@@
   
           args.clear();
           size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&count));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&map.data));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st1.data));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&st2.data));
+ +        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counter));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&count));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
+ +        args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
   
           openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
-         openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
+         openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
           std::swap(st1, st2);
       }
   #undef DIVUP
diff --cc modules/ocl/src/fft.cpp
Simple merge
diff --cc modules/ocl/src/filtering.cpp

index 61e899c,2f4a494..e73e515
--- 1/modules/ocl/src/filtering.cpp
--- 2/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@@ -48,8 -48,8 +48,7 @@@
   //M*/
   
   #include "precomp.hpp"
- #include "mcwutil.hpp"
- #include <iostream>
+ 
- -using namespace std;
   using namespace cv;
   using namespace cv::ocl;
   
@@@ -218,8 -218,8 +217,8 @@@ public
   **Extend this if necessary later.
   **Note that the kernel need to be further refined.
   */
- -static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, 
+ +static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
-                          Size &ksize, const Point anchor, bool rectKernel, bool useROI)
+                          Size &ksize, const Point anchor, bool rectKernel)
   {
       //Normalize the result by default
       //float alpha = ksize.height * ksize.width;
@@@ -275,31 -275,30 +274,30 @@@
       }
   
       char compile_option[128];
-     sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s %s",
- -    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s", 
- -        anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], 
++    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s",
+ +        anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
           rectKernel?"-D RECTKERNEL":"",
-         useROI?"-D USEROI":"",
           s);
- -    vector< pair<size_t, const void *> > args;
- -    args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
- -    args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_x));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_y));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&srcStep));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&dstStep));
- -    args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&dstOffset));
+ +    std::vector< std::pair<size_t, const void *> > args;
+ +    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ +    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_x));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_y));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcStep));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstStep));
+ +    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstOffset));
       openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
   }
   
   
   //! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4
- -static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, 
+ +static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
-                           Size &ksize, const Point anchor, bool rectKernel, bool useROI)
+                           Size &ksize, const Point anchor, bool rectKernel)
   {
       //Normalize the result by default
       //float alpha = ksize.height * ksize.width;
@@@ -356,24 -355,23 +354,23 @@@
       }
   
       char compile_option[128];
-     sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s %s",
- -    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s", 
- -        anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], 
++    sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s",
+ +        anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
           rectKernel?"-D RECTKERNEL":"",
-         useROI?"-D USEROI":"",
           s);
- -    vector< pair<size_t, const void *> > args;
- -    args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
- -    args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_x));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_y));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&srcStep));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&dstStep));
- -    args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
- -    args.push_back(make_pair(sizeof(cl_int), (void *)&dstOffset));
+ +    std::vector< std::pair<size_t, const void *> > args;
+ +    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ +    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_x));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcOffset_y));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&srcStep));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstStep));
+ +    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
+ +    args.push_back(std::make_pair(sizeof(cl_int), (void *)&dstOffset));
       openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
   }
   
diff --cc modules/ocl/src/haar.cpp

index 31da6ec,4e0f5b8..83cfb5a
--- 1/modules/ocl/src/haar.cpp
--- 2/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@@ -1255,30 -1257,30 +1255,30 @@@ CvSeq *cv::ocl::OclCascadeClassifier::o
           //openCLVerifyCall(status);
           scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
           //openCLVerifyCall(status);
-         openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
+         openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
           pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
-         openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
+         openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
           correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
-         openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
+         openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
           //int argcount = 0;
   
- -        vector<pair<size_t, const void *> > args;
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&loopcount ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitnode ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
- -        args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
- -        args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
+ +        std::vector<std::pair<size_t, const void *> > args;
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&candidatebuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&step ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&loopcount ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&endstage ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnode ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&splitnode ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
+ +        args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&nodenum ));
   
   
           openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
diff --cc modules/ocl/src/hog.cpp

index db92205,b23f00c..7fc4b2d
--- 1/modules/ocl/src/hog.cpp
--- 2/modules/ocl/src/hog.cpp
+++ b/modules/ocl/src/hog.cpp
@@@ -44,9 -44,10 +44,9 @@@
   //M*/
   
   #include "precomp.hpp"
- #include "mcwutil.hpp"
+ 
   using namespace cv;
   using namespace cv::ocl;
- -using namespace std;
   
   
   #define CELL_WIDTH 8
diff --cc modules/ocl/src/hough.cpp

index f5d770c,0000000..840f53d

mode 100644,000000..100644
--- 1/modules/ocl/src/hough.cpp
--- /dev/null
+++ b/modules/ocl/src/hough.cpp
@@@ -1,403 -1,0 +1,403 @@@
-         cl_mem counter = clCreateBuffer(src.clCxt->impl->clContext,
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#include "precomp.hpp"
+ +
+ +using namespace cv;
+ +using namespace cv::ocl;
+ +
+ +#if !defined (HAVE_OPENCL)
+ +
+ +void cv::ocl::HoughCircles(const oclMat&, oclMat&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
+ +void cv::ocl::HoughCircles(const oclMat&, oclMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
+ +void cv::ocl::HoughCirclesDownload(const oclMat&, OutputArray) { throw_nogpu(); }
+ +
+ +#else /* !defined (HAVE_OPENCL) */
+ +
+ +#define MUL_UP(a, b) ((a)/(b)+1)*(b)
+ +
+ +namespace cv { namespace ocl {
+ +    ///////////////////////////OpenCL kernel strings///////////////////////////
+ +    extern const char *imgproc_hough;
+ +}}
+ +
+ +
+ +
+ +//////////////////////////////////////////////////////////
+ +// common functions
+ +
+ +namespace
+ +{
+ +    int buildPointList_gpu(const oclMat& src, oclMat& list)
+ +    {
+ +        const int PIXELS_PER_THREAD = 16;
+ +
+ +        int totalCount = 0;
+ +        int err = CL_SUCCESS;
-         openCLSafeCall(clEnqueueReadBuffer(src.clCxt->impl->clCmdQueue, counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
++        cl_mem counter = clCreateBuffer((cl_context)src.clCxt->oclContext(),
+ +                                        CL_MEM_COPY_HOST_PTR,
+ +                                        sizeof(int),
+ +                                        &totalCount,
+ +                                        &err);
+ +        openCLSafeCall(err);
+ +
+ +        const size_t blkSizeX = 32;
+ +        const size_t blkSizeY = 4;
+ +        size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
+ +
+ +        const int PIXELS_PER_BLOCK = blkSizeX * PIXELS_PER_THREAD;
+ +        const size_t glbSizeX = src.cols % (PIXELS_PER_BLOCK) == 0 ? src.cols : MUL_UP(src.cols, PIXELS_PER_BLOCK);
+ +        const size_t glbSizeY = src.rows % blkSizeY == 0 ? src.rows : MUL_UP(src.rows, blkSizeY);
+ +        size_t globalThreads[3] = { glbSizeX, glbSizeY, 1 };
+ +
+ +        std::vector<std::pair<size_t , const void *> > args;
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&src.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.cols ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.rows ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.step ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&list.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&counter ));
+ +
+ +        openCLExecuteKernel(src.clCxt, &imgproc_hough, "buildPointList", globalThreads, localThreads, args, -1, -1);
-         cl_mem counter = clCreateBuffer(accum.clCxt->impl->clContext,
++        openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)src.clCxt->oclCommandQueue(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
+ +        openCLSafeCall(clReleaseMemObject(counter));
+ +
+ +        return totalCount;
+ +    }
+ +}
+ +
+ +//////////////////////////////////////////////////////////
+ +// HoughCircles
+ +
+ +namespace
+ +{
+ +    void circlesAccumCenters_gpu(const oclMat& list, int count, const oclMat& dx, const oclMat& dy, oclMat& accum, int minRadius, int maxRadius, float idp)
+ +    {
+ +        const size_t blkSizeX = 256;
+ +        size_t localThreads[3] = { 256, 1, 1 };
+ +
+ +        const size_t glbSizeX = count % blkSizeX == 0 ? count : MUL_UP(count, blkSizeX);
+ +        size_t globalThreads[3] = { glbSizeX, 1, 1 };
+ +
+ +        const int width  = accum.cols - 2;
+ +        const int height = accum.rows - 2;
+ +
+ +        std::vector<std::pair<size_t , const void *> > args;
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&list.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&count ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&dx.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dx.step ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&dy.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dy.step ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&accum.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.step ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&width ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&height ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&minRadius));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&maxRadius));
+ +        args.push_back( std::make_pair( sizeof(cl_float), (void *)&idp));
+ +
+ +        openCLExecuteKernel(accum.clCxt, &imgproc_hough, "circlesAccumCenters", globalThreads, localThreads, args, -1, -1);
+ +    }
+ +
+ +    int buildCentersList_gpu(const oclMat& accum, oclMat& centers, int threshold)
+ +    {
+ +        int totalCount = 0;
+ +        int err = CL_SUCCESS;
-         openCLSafeCall(clEnqueueReadBuffer(accum.clCxt->impl->clCmdQueue, counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
++        cl_mem counter = clCreateBuffer((cl_context)accum.clCxt->oclContext(),
+ +                                        CL_MEM_COPY_HOST_PTR,
+ +                                        sizeof(int),
+ +                                        &totalCount,
+ +                                        &err);
+ +        openCLSafeCall(err);
+ +
+ +        const size_t blkSizeX = 32;
+ +        const size_t blkSizeY = 8;
+ +        size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
+ +
+ +        const size_t glbSizeX = (accum.cols - 2) % blkSizeX == 0 ? accum.cols - 2 : MUL_UP(accum.cols - 2, blkSizeX);
+ +        const size_t glbSizeY = (accum.rows - 2) % blkSizeY == 0 ? accum.rows - 2 : MUL_UP(accum.rows - 2, blkSizeY);
+ +        size_t globalThreads[3] = { glbSizeX, glbSizeY, 1 };
+ +
+ +        std::vector<std::pair<size_t , const void *> > args;
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&accum.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.cols ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.rows ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&accum.step ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&centers.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&threshold ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&counter ));
+ +
+ +        openCLExecuteKernel(accum.clCxt, &imgproc_hough, "buildCentersList", globalThreads, localThreads, args, -1, -1);
+ +
-         cl_mem counter = clCreateBuffer(circles.clCxt->impl->clContext,
++        openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)accum.clCxt->oclCommandQueue(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
+ +        openCLSafeCall(clReleaseMemObject(counter));
+ +
+ +        return totalCount;
+ +    }
+ +
+ +    int circlesAccumRadius_gpu(const oclMat& centers, int centersCount,
+ +                               const oclMat& list, int count,
+ +                               oclMat& circles, int maxCircles,
+ +                               float dp, int minRadius, int maxRadius, int threshold)
+ +    {
+ +        int totalCount = 0;
+ +        int err = CL_SUCCESS;
-         const size_t blkSizeX = circles.clCxt->impl->maxWorkGroupSize;
++        cl_mem counter = clCreateBuffer((cl_context)circles.clCxt->oclContext(),
+ +                                        CL_MEM_COPY_HOST_PTR,
+ +                                        sizeof(int),
+ +                                        &totalCount,
+ +                                        &err);
+ +        openCLSafeCall(err);
+ +
-         openCLSafeCall(clEnqueueReadBuffer(circles.clCxt->impl->clCmdQueue, counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
++        const size_t blkSizeX = circles.clCxt->maxWorkGroupSize();
+ +        size_t localThreads[3] = { blkSizeX, 1, 1 };
+ +
+ +        const size_t glbSizeX = centersCount * blkSizeX;
+ +        size_t globalThreads[3] = { glbSizeX, 1, 1 };
+ +
+ +        const int histSize = maxRadius - minRadius + 1;
+ +        size_t smemSize = (histSize + 2) * sizeof(int);
+ +
+ +        std::vector<std::pair<size_t , const void *> > args;
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&centers.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&list.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&count ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&circles.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&maxCircles ));
+ +        args.push_back( std::make_pair( sizeof(cl_float), (void *)&dp ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&minRadius ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&maxRadius ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&histSize ));
+ +        args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&threshold ));
+ +        args.push_back( std::make_pair( smemSize        , (void *)NULL ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&counter ));
+ +
+ +        CV_Assert(circles.offset == 0);
+ +
+ +        openCLExecuteKernel(circles.clCxt, &imgproc_hough, "circlesAccumRadius", globalThreads, localThreads, args, -1, -1);
+ +
-         openCLSafeCall(clEnqueueReadBuffer(buf.centers.clCxt->impl->clCmdQueue,
++        openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)circles.clCxt->oclCommandQueue(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
+ +
+ +        openCLSafeCall(clReleaseMemObject(counter));
+ +
+ +        totalCount = std::min(totalCount, maxCircles);
+ +
+ +        return totalCount;
+ +    }
+ +
+ +
+ +} // namespace
+ +
+ +
+ +
+ +void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
+ +{
+ +    HoughCirclesBuf buf;
+ +    HoughCircles(src, circles, buf, method, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
+ +}
+ +
+ +void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method,
+ +                           float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
+ +{
+ +    CV_Assert(src.type() == CV_8UC1);
+ +    CV_Assert(src.cols < std::numeric_limits<unsigned short>::max());
+ +    CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
+ +    CV_Assert(method == CV_HOUGH_GRADIENT);
+ +    CV_Assert(dp > 0);
+ +    CV_Assert(minRadius > 0 && maxRadius > minRadius);
+ +    CV_Assert(cannyThreshold > 0);
+ +    CV_Assert(votesThreshold > 0);
+ +    CV_Assert(maxCircles > 0);
+ +
+ +    const float idp = 1.0f / dp;
+ +
+ +    cv::ocl::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold);
+ +
+ +    ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.srcPoints);
+ +    const int pointsCount = buildPointList_gpu(buf.edges, buf.srcPoints);
+ +    if (pointsCount == 0)
+ +    {
+ +        circles.release();
+ +        return;
+ +    }
+ +
+ +    ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum);
+ +    buf.accum.setTo(Scalar::all(0));
+ +
+ +    circlesAccumCenters_gpu(buf.srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp);
+ +
+ +    ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.centers);
+ +    int centersCount = buildCentersList_gpu(buf.accum, buf.centers, votesThreshold);
+ +    if (centersCount == 0)
+ +    {
+ +        circles.release();
+ +        return;
+ +    }
+ +
+ +    if (minDist > 1)
+ +    {
+ +        cv::AutoBuffer<unsigned int> oldBuf_(centersCount);
+ +        cv::AutoBuffer<unsigned int> newBuf_(centersCount);
+ +        int newCount = 0;
+ +
+ +        unsigned int* oldBuf = oldBuf_;
+ +        unsigned int* newBuf = newBuf_;
+ +
-         openCLSafeCall(clEnqueueWriteBuffer(buf.centers.clCxt->impl->clCmdQueue,
++        openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)buf.centers.clCxt->oclCommandQueue(),
+ +                                           (cl_mem)buf.centers.data,
+ +                                           CL_TRUE,
+ +                                           0,
+ +                                           centersCount * sizeof(unsigned int),
+ +                                           oldBuf,
+ +                                           0,
+ +                                           NULL,
+ +                                           NULL));
+ +
+ +
+ +        const int cellSize = cvRound(minDist);
+ +        const int gridWidth = (src.cols + cellSize - 1) / cellSize;
+ +        const int gridHeight = (src.rows + cellSize - 1) / cellSize;
+ +
+ +        std::vector< std::vector<unsigned int> > grid(gridWidth * gridHeight);
+ +
+ +        const float minDist2 = minDist * minDist;
+ +
+ +        for (int i = 0; i < centersCount; ++i)
+ +        {
+ +            unsigned int p = oldBuf[i];
+ +            const int px = p & 0xFFFF;
+ +            const int py = (p >> 16) & 0xFFFF;
+ +
+ +            bool good = true;
+ +
+ +            int xCell = static_cast<int>(px / cellSize);
+ +            int yCell = static_cast<int>(py / cellSize);
+ +
+ +            int x1 = xCell - 1;
+ +            int y1 = yCell - 1;
+ +            int x2 = xCell + 1;
+ +            int y2 = yCell + 1;
+ +
+ +            // boundary check
+ +            x1 = std::max(0, x1);
+ +            y1 = std::max(0, y1);
+ +            x2 = std::min(gridWidth - 1, x2);
+ +            y2 = std::min(gridHeight - 1, y2);
+ +
+ +            for (int yy = y1; yy <= y2; ++yy)
+ +            {
+ +                for (int xx = x1; xx <= x2; ++xx)
+ +                {
+ +                    std::vector<unsigned int>& m = grid[yy * gridWidth + xx];
+ +
+ +                    for(size_t j = 0; j < m.size(); ++j)
+ +                    {
+ +                        const int val = m[j];
+ +                        const int jx = val & 0xFFFF;
+ +                        const int jy = (val >> 16) & 0xFFFF;
+ +
+ +                        float dx = (float)(px - jx);
+ +                        float dy = (float)(py - jy);
+ +
+ +                        if (dx * dx + dy * dy < minDist2)
+ +                        {
+ +                            good = false;
+ +                            goto break_out;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +
+ +            break_out:
+ +
+ +            if(good)
+ +            {
+ +                grid[yCell * gridWidth + xCell].push_back(p);
+ +                newBuf[newCount++] = p;
+ +            }
+ +        }
+ +
++        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)buf.centers.clCxt->oclCommandQueue(),
+ +                                            (cl_mem)buf.centers.data,
+ +                                            CL_TRUE,
+ +                                            0,
+ +                                            newCount * sizeof(unsigned int),
+ +                                            newBuf,
+ +                                            0,
+ +                                            0,
+ +                                            0));
+ +        centersCount = newCount;
+ +    }
+ +
+ +    ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles);
+ +
+ +    const int circlesCount = circlesAccumRadius_gpu(buf.centers, centersCount,
+ +                                                           buf.srcPoints, pointsCount,
+ +                                                           circles, maxCircles,
+ +                                                           dp, minRadius, maxRadius, votesThreshold);
+ +
+ +    if (circlesCount > 0)
+ +        circles.cols = circlesCount;
+ +    else
+ +        circles.release();
+ +}
+ +
+ +void cv::ocl::HoughCirclesDownload(const oclMat& d_circles, cv::OutputArray h_circles_)
+ +{
+ +    // FIX ME: garbage values are copied!
+ +    CV_Error(CV_StsNotImplemented, "HoughCirclesDownload is not implemented");
+ +
+ +    if (d_circles.empty())
+ +    {
+ +        h_circles_.release();
+ +        return;
+ +    }
+ +
+ +    CV_Assert(d_circles.rows == 1 && d_circles.type() == CV_32FC3);
+ +
+ +    h_circles_.create(1, d_circles.cols, CV_32FC3);
+ +    Mat h_circles = h_circles_.getMat();
+ +    d_circles.download(h_circles);
+ +}
+ +
+ +#endif /* !defined (HAVE_OPENCL) */
diff --cc modules/ocl/src/imgproc.cpp

index 3501636,04f732f..b6bf62d
--- 1/modules/ocl/src/imgproc.cpp
--- 2/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@@ -269,30 -270,30 +269,30 @@@ namespace c
               size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
   
   
- -            vector< pair<size_t, const void *> > args;
+ +            std::vector< std::pair<size_t, const void *> > args;
               if(map1.channels() == 2)
               {
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&map1.data));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.offset));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.offset));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.step));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.step));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols));
                   float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
   
-                if(src.clCxt -> impl -> double_support != 0)
+                if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                   {
- -                    args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
+ +                    args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue));
                   }
                   else
                   {
@@@ -301,26 -302,26 +301,26 @@@
               }
               if(map1.channels() == 1)
               {
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&map1.data));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&map2.data));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.offset));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.offset));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.step));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.step));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&map1.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols));
-                 if(src.clCxt -> impl -> double_support != 0)
+                 if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                   {
- -                    args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
+ +                    args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue));
                   }
                   else
                   {
@@@ -369,23 -370,23 +369,23 @@@
               size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
               size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
   
- -            vector< pair<size_t, const void *> > args;
+ +            std::vector< std::pair<size_t, const void *> > args;
               if(interpolation == INTER_NEAREST)
               {
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
- -                args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
- -                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ +                args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+ +                args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.rows));
-                 if(src.clCxt -> impl -> double_support != 0)
+                 if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                   {
- -                    args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
- -                    args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
+ +                    args.push_back( std::make_pair(sizeof(cl_double), (void *)&ifx_d));
+ +                    args.push_back( std::make_pair(sizeof(cl_double), (void *)&ify_d));
                   }
                   else
                   {
@@@ -819,16 -820,16 +819,16 @@@
                   cl_mem coeffs_cm;
   
                   Context *clCxt = src.clCxt;
- -                string s[3] = {"NN", "Linear", "Cubic"};
- -                string kernelName = "warpAffine" + s[interpolation];
+ +                std::string s[3] = {"NN", "Linear", "Cubic"};
+ +                std::string kernelName = "warpAffine" + s[interpolation];
   
   
-                 if(src.clCxt -> impl -> double_support != 0)
+                 if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                   {
                       cl_int st;
-                     coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
+                     coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
                       openCLVerifyCall(st);
-                     openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
+                     openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
                   }
                   else
                   {
@@@ -890,15 -891,15 +890,15 @@@
                   cl_mem coeffs_cm;
   
                   Context *clCxt = src.clCxt;
- -                string s[3] = {"NN", "Linear", "Cubic"};
- -                string kernelName = "warpPerspective" + s[interpolation];
+ +                std::string s[3] = {"NN", "Linear", "Cubic"};
+ +                std::string kernelName = "warpPerspective" + s[interpolation];
   
-                 if(src.clCxt -> impl -> double_support != 0)
+                 if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                   {
                       cl_int st;
-                     coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
+                     coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
                       openCLVerifyCall(st);
-                     openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
+                     openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
                   }
                   else
                   {
diff --cc modules/ocl/src/initialization.cpp

index c0796d0,d3fc9c2..93bf1e6
--- 1/modules/ocl/src/initialization.cpp
--- 2/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@@ -121,26 -124,8 +121,8 @@@ namespace c
               cacheSize = 0;
           }
   
-         ////////////////////////Common OpenCL specific calls///////////////
-         int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type)
-         {
-             rw_type = gDeviceMemRW;
-             mem_type = gDeviceMemType;
-             return Context::getContext()->impl->unified_memory;
-         }
- 
-         int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
-         {
-             if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
-                  mem_type == DEVICE_MEM_UHP ||
-                  mem_type == DEVICE_MEM_CHP )
-                 return -1;
-             gDeviceMemRW = rw_type;
-             gDeviceMemType = mem_type;
-             return 0;
-         }
   
- -        struct Info::Impl
+ +       struct Info::Impl
           {
               cl_platform_id oclplatform;
               std::vector<cl_device_id> devices;
@@@ -155,12 -140,138 +137,138 @@@
               cl_uint maxComputeUnits;
               char extra_options[512];
               int  double_support;
- -            string binpath;
+             int unified_memory; //1 means integrated GPU, otherwise this value is 0
++            std::string binpath;
+             int refcounter;
+ 
               Impl()
               {
+                 refcounter = 1;
+                 oclplatform = 0;
+                 oclcontext = 0;
+                 clCmdQueue = 0;
+                 devnum = -1;
+                 maxComputeUnits = 0;
+                 maxWorkGroupSize = 0;
                   memset(extra_options, 0, 512);
- -                }
+                 double_support = 0;
+                 unified_memory = 0;
+             }
+ 
+             void setDevice(void *ctx, void *q, int devnum);
+ 
+             void release()
+             {
+                 if(1 == CV_XADD(&refcounter, -1))
+                 {
+                     releaseResources();
+                     delete this;
+ +            }
+             }
+ 
+             Impl* copy()
+             {
+                 CV_XADD(&refcounter, 1);
+                 return this;
+             }
+ 
+         private:
+             Impl(const Impl&);
+             Impl& operator=(const Impl&);
+             void releaseResources();
           };
   
+         void Info::Impl::releaseResources()
+         {
+             devnum = -1;
+ 
+             if(clCmdQueue)
+             {
+                 openCLSafeCall(clReleaseCommandQueue(clCmdQueue));
+                 clCmdQueue = 0;
+             }
+ 
+             if(oclcontext)
+             {
+                 openCLSafeCall(clReleaseContext(oclcontext));
+                 oclcontext = 0;
+             }
+         }
+ 
+         void Info::Impl::setDevice(void *ctx, void *q, int dnum)
+         {
+             if((ctx && q) || devnum != dnum)
+                 releaseResources();
+ 
+             CV_Assert(dnum >= 0 && dnum < (int)devices.size());
+             devnum = dnum;
+             if(ctx && q)
+             {
+                 oclcontext = (cl_context)ctx;
+                 clCmdQueue = (cl_command_queue)q;
+                 clRetainContext(oclcontext);
+                 clRetainCommandQueue(clCmdQueue);
+             }
+             else
+             {
+                 cl_int status = 0;
+                 cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(oclplatform), 0 };
+                 oclcontext = clCreateContext(cps, 1, &devices[devnum], 0, 0, &status);
+                 openCLVerifyCall(status);
+                 clCmdQueue = clCreateCommandQueue(oclcontext, devices[devnum], CL_QUEUE_PROFILING_ENABLE, &status);
+                 openCLVerifyCall(status);
+             }
+ 
+             openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&maxWorkGroupSize, 0));
+             openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void *)&maxDimensions, 0));
+             maxWorkItemSizes.resize(maxDimensions);
+             openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxDimensions, (void *)&maxWorkItemSizes[0], 0));
+             openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), (void *)&maxComputeUnits, 0));
+ 
+             cl_bool unfymem = false;
+             openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), (void *)&unfymem, 0));
+             unified_memory = unfymem ? 1 : 0;
+ 
+             //initialize extra options for compilation. Currently only fp64 is included.
+             //Assume 4KB is enough to store all possible extensions.
+             const int EXT_LEN = 4096 + 1 ;
+             char extends_set[EXT_LEN];
+             size_t extends_size;
+             openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size));
+             extends_set[EXT_LEN - 1] = 0;
+             size_t fp64_khr = std::string(extends_set).find("cl_khr_fp64");
+ 
+             if(fp64_khr != std::string::npos)
+             {
+                 sprintf(extra_options, "-D DOUBLE_SUPPORT");
+                 double_support = 1;
+             }
+             else
+             {
+                 memset(extra_options, 0, 512);
+                 double_support = 0;
+             }
+         }
+ 
+         ////////////////////////Common OpenCL specific calls///////////////
+         int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type)
+         {
+             rw_type = gDeviceMemRW;
+             mem_type = gDeviceMemType;
+             return Context::getContext()->impl->unified_memory;
+         }
+ 
+         int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
+         {
+             if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) ||
+                  mem_type == DEVICE_MEM_UHP ||
+                  mem_type == DEVICE_MEM_CHP )
+                 return -1;
+             gDeviceMemRW = rw_type;
+             gDeviceMemType = mem_type;
+             return 0;
+         }
+ 
           inline int divUp(int total, int grain)
           {
               return (total + grain - 1) / grain;
@@@ -512,17 -562,17 +559,17 @@@
                           char *buildLog = NULL;
                           size_t buildLogSize = 0;
                           logStatus = clGetProgramBuildInfo(program,
-                                                           clCxt->impl->devices, CL_PROGRAM_BUILD_LOG, buildLogSize,
+                                                           clCxt->impl->devices[clCxt->impl->devnum], CL_PROGRAM_BUILD_LOG, buildLogSize,
                                                             buildLog, &buildLogSize);
                           if(logStatus != CL_SUCCESS)
- -                            cout << "Failed to build the program and get the build info." << endl;
+ +                            std::cout << "Failed to build the program and get the build info." << std::endl;
                           buildLog = new char[buildLogSize];
                           CV_DbgAssert(!!buildLog);
                           memset(buildLog, 0, buildLogSize);
-                         openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices,
+                         openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[clCxt->impl->devnum],
                                                                CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
- -                        cout << "\n\t\t\tBUILD LOG\n";
- -                        cout << buildLog << endl;
+ +                        std::cout << "\n\t\t\tBUILD LOG\n";
+ +                        std::cout << buildLog << std::endl;
                           delete [] buildLog;
                       }
                       openCLVerifyCall(status);
@@@ -810,70 -860,95 +857,100 @@@
           }
   
           /////////////////////////////OpenCL initialization/////////////////
- -        auto_ptr<Context> Context::clCxt;
+ +        std::auto_ptr<Context> Context::clCxt;
           int Context::val = 0;
-         Mutex cs;
+         static Mutex cs;
- -        Context* Context::getContext()
+ +        Context *Context::getContext()
           {
-             if(val == 0)
+             if(*((volatile int*)&val) != 1)
               {
                   AutoLock al(cs);
-                 if( NULL == clCxt.get())
+                 if(*((volatile int*)&val) != 1)
+                 {
+                     if( 0 == clCxt.get())
- -                        clCxt.reset(new Context);
+ +                    clCxt.reset(new Context);
   
-                 val = 1;
-                 return clCxt.get();
+                     std::vector<Info> oclinfo;
+                     CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0);
+                     oclinfo[0].impl->setDevice(0, 0, 0);
+                     clCxt.get()->impl = oclinfo[0].impl->copy();
+ 
+                     *((volatile int*)&val) = 1;
+                 }
               }
-             else
-             {
- -            return clCxt.get();
- -        }
+ +                return clCxt.get();
+ +            }
-         }
+ 
           void Context::setContext(Info &oclinfo)
           {
-             Context *clcxt = getContext();
-             clcxt->impl->clContext = oclinfo.impl->oclcontext;
-             clcxt->impl->clCmdQueue = oclinfo.impl->clCmdQueue;
-             clcxt->impl->devices = oclinfo.impl->devices[oclinfo.impl->devnum];
-             clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum];
-             clcxt->impl->maxDimensions = oclinfo.impl->maxDimensions;
-             clcxt->impl->maxWorkGroupSize = oclinfo.impl->maxWorkGroupSize;
-             for(size_t i=0; i<clcxt->impl->maxDimensions && i<4; i++)
-                 clcxt->impl->maxWorkItemSizes[i] = oclinfo.impl->maxWorkItemSizes[i];
-             clcxt->impl->maxComputeUnits = oclinfo.impl->maxComputeUnits;
-             clcxt->impl->double_support = oclinfo.impl->double_support;
-             //extra options to recognize compiler options
-             memcpy(clcxt->impl->extra_options, oclinfo.impl->extra_options, 512);
-             cl_bool unfymem = false;
-             openCLSafeCall(clGetDeviceInfo(clcxt->impl->devices, CL_DEVICE_HOST_UNIFIED_MEMORY,
-                                            sizeof(cl_bool), (void *)&unfymem, NULL));
-             if(unfymem)
-                 clcxt->impl->unified_memory = 1;
+             AutoLock guard(cs);
+             if(*((volatile int*)&val) != 1)
+             {
+                 if( 0 == clCxt.get())
+                     clCxt.reset(new Context);
+ 
+                 clCxt.get()->impl = oclinfo.impl->copy();
+ 
+                 *((volatile int*)&val) = 1;
+             }
+             else
+             {
+                 clCxt.get()->impl->release();
+                 clCxt.get()->impl = oclinfo.impl->copy();
+             }
           }
+ 
           Context::Context()
           {
-             impl = new Impl;
-             //Information of the OpenCL context
-             impl->clContext = NULL;
-             impl->clCmdQueue = NULL;
-             impl->devices = NULL;
-             impl->maxDimensions = 0;
-             impl->maxWorkGroupSize = 0;
-             for(int i=0; i<4; i++)
-                 impl->maxWorkItemSizes[i] = 0;
-             impl->maxComputeUnits = 0;
-             impl->double_support = 0;
-             //extra options to recognize vendor specific fp64 extensions
-             memset(impl->extra_options, 0, 512);
-             impl->unified_memory = 0;
+             impl = 0;
               programCache = ProgramCache::getProgramCache();
           }
   
           Context::~Context()
           {
-             delete impl;
+             release();
+         }
+ 
+         void Context::release()
+         {
+             if (impl)
+                 impl->release();
               programCache->releaseProgram();
           }
+ 
+         bool Context::supportsFeature(int ftype)
+         {
+             switch(ftype)
+             {
+             case CL_DOUBLE:
+                 return impl->double_support == 1;
+             case CL_UNIFIED_MEM:
+                 return impl->unified_memory == 1;
+             default:
+                 return false;
+             }
+         }
+ 
+         size_t Context::computeUnits()
+         {
+             return impl->maxComputeUnits;
+         }
+ 
++        size_t Context::maxWorkGroupSize()
++        {
++            return impl->maxWorkGroupSize;
++        }
++
+         void* Context::oclContext()
+         {
+             return impl->oclcontext;
+         }
+ 
+         void* Context::oclCommandQueue()
+         {
+             return impl->clCmdQueue;
+         }
+ 
           Info::Info()
           {
               impl = new Impl;
diff --cc modules/ocl/src/interpolate_frames.cpp

index fe06187,4a7d7d8..b7e66d8
--- 1/modules/ocl/src/interpolate_frames.cpp
--- 2/modules/ocl/src/interpolate_frames.cpp
+++ b/modules/ocl/src/interpolate_frames.cpp
@@@ -43,10 -43,9 +43,8 @@@
   //
   //M*/
   
- #include <iomanip>
   #include "precomp.hpp"
- #include "mcwutil.hpp"
   
- -using namespace std;
   using namespace cv;
   using namespace cv::ocl;
   
diff --cc modules/ocl/src/matrix_operations.cpp
Simple merge
diff --cc modules/ocl/src/mcwutil.cpp

index ebdcbf7,8b7e187..f1c44f9
--- 1/modules/ocl/src/mcwutil.cpp
--- 2/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@@ -123,7 -124,7 +122,7 @@@ namespace c
                                     build_options, finish_mode);
           }
   
--       cl_mem bindTexture(const oclMat &mat)
++        cl_mem bindTexture(const oclMat &mat)
           {
               cl_mem texture;
               cl_image_format format;
diff --cc modules/ocl/src/moments.cpp

index 4bf3d00,285041d..d7df6fd
--- 1/modules/ocl/src/moments.cpp
--- 2/modules/ocl/src/moments.cpp
+++ b/modules/ocl/src/moments.cpp
@@@ -140,51 -135,46 +135,46 @@@ static void icvContourMoments( CvSeq* c
           int llength = std::min(lpt,128);
           size_t localThreads[3]  = { llength, 1, 1};
           size_t globalThreads[3] = { lpt, 1, 1};
- -        vector<pair<size_t , const void *> > args;
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&contour->total ));
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
+ +        std::vector<std::pair<size_t , const void *> > args;
+ +        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&contour->total ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a00.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a10.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a01.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a20.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a11.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a02.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a30.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a21.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a12.data ));
-         args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a03.data ));
-         openCLExecuteKernel(dst_a00.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
- 
-         cv::Mat dst(dst_a00);
-         cv::Scalar s = cv::sum(dst);
-         a00 = s[0];
-         dst = dst_a10;
-         s = cv::sum(dst);
-         a10 = s[0];//dstsum[1];
-         dst = dst_a01;
-         s = cv::sum(dst);
-         a01 = s[0];//dstsum[2];
-         dst = dst_a20;
-         s = cv::sum(dst);
-         a20 = s[0];//dstsum[3];
-         dst = dst_a11;
-         s = cv::sum(dst);
-         a11 = s[0];//dstsum[4];
-         dst = dst_a02;
-         s = cv::sum(dst);
-         a02 = s[0];//dstsum[5];
-         dst = dst_a30;
-         s = cv::sum(dst);
-         a30 = s[0];//dstsum[6];
-         dst = dst_a21;
-         s = cv::sum(dst);
-         a21 = s[0];//dstsum[7];
-         dst = dst_a12;
-         s = cv::sum(dst);
-         a12 = s[0];//dstsum[8];
-         dst = dst_a03;
-         s = cv::sum(dst);
-         a03 = s[0];//dstsum[9];
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
+         cl_int dst_step = (cl_int)dst_a.step;
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step ));
++        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step ));
+ 
+         openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
+ 
+         cv::Mat dst(dst_a);
+         a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
+         if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE))
+         {
+             for (int i = 0; i < contour->total; ++i)
+             {
+                 a00 += dst.at<cl_long>(0, i);
+                 a10 += dst.at<cl_long>(1, i);
+                 a01 += dst.at<cl_long>(2, i);
+                 a20 += dst.at<cl_long>(3, i);
+                 a11 += dst.at<cl_long>(4, i);
+                 a02 += dst.at<cl_long>(5, i);
+                 a30 += dst.at<cl_long>(6, i);
+                 a21 += dst.at<cl_long>(7, i);
+                 a12 += dst.at<cl_long>(8, i);
+                 a03 += dst.at<cl_long>(9, i);
+             }
+         }
+         else
+         {
+             a00 = cv::sum(dst.row(0))[0];
+             a10 = cv::sum(dst.row(1))[0];
+             a01 = cv::sum(dst.row(2))[0];
+             a20 = cv::sum(dst.row(3))[0];
+             a11 = cv::sum(dst.row(4))[0];
+             a02 = cv::sum(dst.row(5))[0];
+             a30 = cv::sum(dst.row(6))[0];
+             a21 = cv::sum(dst.row(7))[0];
+             a12 = cv::sum(dst.row(8))[0];
+             a03 = cv::sum(dst.row(9))[0];
+         }
   
           double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
           if( fabs(a00) > FLT_EPSILON )
diff --cc modules/ocl/src/opencl/imgproc_hough.cl

index 06655de,0000000..fd1c5b9

mode 100644,000000..100644
--- 1/modules/ocl/src/kernels/imgproc_hough.cl
--- /dev/null
+++ b/modules/ocl/src/opencl/imgproc_hough.cl
@@@ -1,280 -1,0 +1,280 @@@
-         
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or bpied warranties, including, but not limited to, the bpied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+ +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +// buildPointList
+ +
+ +#define PIXELS_PER_THREAD 16
+ +
+ +// TODO: add offset to support ROI
+ +__kernel void buildPointList(__global const uchar* src,
+ +                             int cols,
+ +                             int rows,
+ +                             int step,
+ +                             __global unsigned int* list,
+ +                             __global int* counter)
+ +{
+ +    __local unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
+ +    __local int s_qsize[4];
+ +    __local int s_globStart[4];
+ +
+ +    const int x = get_group_id(0) * get_local_size(0) * PIXELS_PER_THREAD + get_local_id(0);
+ +    const int y = get_global_id(1);
+ +
+ +    if (get_local_id(0) == 0)
+ +        s_qsize[get_local_id(1)] = 0;
+ +    barrier(CLK_LOCAL_MEM_FENCE);
-     
++
+ +    if (y < rows)
+ +    {
+ +        // fill the queue
+ +        __global const uchar* srcRow = &src[y * step];
+ +        for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < cols; ++i, xx += get_local_size(0))
+ +        {
+ +            if (srcRow[xx])
+ +            {
+ +                const unsigned int val = (y << 16) | xx;
+ +                const int qidx = atomic_add(&s_qsize[get_local_id(1)], 1);
+ +                s_queues[get_local_id(1)][qidx] = val;
+ +            }
+ +        }
+ +    }
+ +
+ +    barrier(CLK_LOCAL_MEM_FENCE);
+ +
+ +    // let one work-item reserve the space required in the global list
+ +    if (get_local_id(0) == 0 && get_local_id(1) == 0)
+ +    {
+ +        // find how many items are stored in each list
+ +        int totalSize = 0;
+ +        for (int i = 0; i < get_local_size(1); ++i)
+ +        {
+ +            s_globStart[i] = totalSize;
+ +            totalSize += s_qsize[i];
+ +        }
+ +
+ +        // calculate the offset in the global list
+ +        const int globalOffset = atomic_add(counter, totalSize);
+ +        for (int i = 0; i < get_local_size(1); ++i)
+ +            s_globStart[i] += globalOffset;
+ +    }
+ +
+ +    barrier(CLK_GLOBAL_MEM_FENCE);
-     
++
+ +    // copy local queues to global queue
+ +    const int qsize = s_qsize[get_local_id(1)];
+ +    int gidx = s_globStart[get_local_id(1)] + get_local_id(0);
+ +    for(int i = get_local_id(0); i < qsize; i += get_local_size(0), gidx += get_local_size(0))
+ +        list[gidx] = s_queues[get_local_id(1)][i];
+ +}
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +// circlesAccumCenters
+ +
+ +// TODO: add offset to support ROI
+ +__kernel void circlesAccumCenters(__global const unsigned int* list,
+ +                                  const int count,
+ +                                  __global const int* dx,
+ +                                  const int dxStep,
+ +                                  __global const int* dy,
+ +                                  const int dyStep,
+ +                                  __global int* accum,
+ +                                  const int accumStep,
+ +                                  const int width,
+ +                                  const int height,
+ +                                  const int minRadius,
+ +                                  const int maxRadius,
+ +                                  const float idp)
+ +{
+ +    const int dxStepInPixel    = dxStep    / sizeof(int);
+ +    const int dyStepInPixel    = dyStep    / sizeof(int);
+ +    const int accumStepInPixel = accumStep / sizeof(int);
-     
++
+ +    const int SHIFT = 10;
+ +    const int ONE = 1 << SHIFT;
+ +
+ +    // const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+ +    const int wid = get_global_id(0);
+ +
+ +    if (wid >= count)
+ +        return;
+ +
+ +    const unsigned int val = list[wid];
+ +
+ +    const int x = (val & 0xFFFF);
+ +    const int y = (val >> 16) & 0xFFFF;
+ +
+ +    const int vx = dx[mad24(y, dxStepInPixel, x)];
+ +    const int vy = dy[mad24(y, dyStepInPixel, x)];
+ +
+ +    if (vx == 0 && vy == 0)
+ +        return;
+ +
+ +    const float mag = sqrt(convert_float(vx * vx + vy * vy));
+ +
+ +    const int x0 = convert_int_rte((x * idp) * ONE);
+ +    const int y0 = convert_int_rte((y * idp) * ONE);
+ +
+ +    int sx = convert_int_rte((vx * idp) * ONE / mag);
+ +    int sy = convert_int_rte((vy * idp) * ONE / mag);
+ +
+ +    // Step from minRadius to maxRadius in both directions of the gradient
+ +    for (int k1 = 0; k1 < 2; ++k1)
+ +    {
+ +        int x1 = x0 + minRadius * sx;
+ +        int y1 = y0 + minRadius * sy;
+ +
+ +        for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
+ +        {
+ +            const int x2 = x1 >> SHIFT;
+ +            const int y2 = y1 >> SHIFT;
+ +
+ +            if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
+ +                break;
+ +
+ +            atomic_add(&accum[mad24(y2+1, accumStepInPixel, x2+1)], 1);
+ +        }
+ +
+ +        sx = -sx;
+ +        sy = -sy;
+ +    }
+ +}
+ +
+ +// ////////////////////////////////////////////////////////////////////////
+ +// // buildCentersList
+ +
+ +// TODO: add offset to support ROI
+ +__kernel void buildCentersList(__global const int* accum,
+ +                               const int accumCols,
+ +                               const int accumRows,
+ +                               const int accumStep,
+ +                               __global unsigned int* centers,
+ +                               const int threshold,
+ +                               __global int* counter)
+ +{
+ +    const int accumStepInPixel = accumStep/sizeof(int);
-         
++
+ +    const int x = get_global_id(0);
+ +    const int y = get_global_id(1);
+ +
+ +    if (x < accumCols - 2 && y < accumRows - 2)
+ +    {
+ +        const int top    = accum[mad24(y,     accumStepInPixel, x + 1)];
+ +
+ +        const int left   = accum[mad24(y + 1, accumStepInPixel, x)];
+ +        const int cur    = accum[mad24(y + 1, accumStepInPixel, x + 1)];
+ +        const int right  = accum[mad24(y + 1, accumStepInPixel, x + 2)];
-             
++
+ +        const int bottom = accum[mad24(y + 2, accumStepInPixel, x + 1)];;
+ +
+ +        if (cur > threshold && cur > top && cur >= bottom && cur >  left && cur >= right)
+ +        {
+ +            const unsigned int val = (y << 16) | x;
+ +            const int idx = atomic_add(counter, 1);
+ +            centers[idx] = val;
+ +        }
+ +    }
+ +}
+ +
+ +
+ +// ////////////////////////////////////////////////////////////////////////
+ +// // circlesAccumRadius
+ +
+ +// TODO: add offset to support ROI
+ +__kernel void circlesAccumRadius(__global const unsigned int* centers,
+ +                                 __global const unsigned int* list, const int count,
+ +                                 __global float4* circles, const int maxCircles,
+ +                                 const float dp,
+ +                                 const int minRadius, const int maxRadius,
+ +                                 const int histSize,
+ +                                 const int threshold,
+ +                                 __local int* smem,
+ +                                 __global int* counter)
+ +{
+ +    for (int i = get_local_id(0); i < histSize + 2; i += get_local_size(0))
+ +        smem[i] = 0;
+ +    barrier(CLK_LOCAL_MEM_FENCE);
+ +
+ +    unsigned int val = centers[get_group_id(0)];
+ +
+ +    float cx = convert_float(val & 0xFFFF);
+ +    float cy = convert_float((val >> 16) & 0xFFFF);
+ +
+ +    cx = (cx + 0.5f) * dp;
+ +    cy = (cy + 0.5f) * dp;
+ +
+ +    for (int i = get_local_id(0); i < count; i += get_local_size(0))
+ +    {
+ +        val = list[i];
+ +
+ +        const int x = (val & 0xFFFF);
+ +        const int y = (val >> 16) & 0xFFFF;
+ +
+ +        const float rad = sqrt((cx - x) * (cx - x) + (cy - y) * (cy - y));
+ +        if (rad >= minRadius && rad <= maxRadius)
+ +        {
+ +            const int r = convert_int_rte(rad - minRadius);
+ +
+ +            atomic_add(&smem[r + 1], 1);
+ +        }
+ +    }
+ +
+ +    barrier(CLK_LOCAL_MEM_FENCE);
+ +
+ +    for (int i = get_local_id(0); i < histSize; i += get_local_size(0))
+ +    {
+ +        const int curVotes = smem[i + 1];
+ +
+ +        if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
++
+ +        {
+ +            const int ind = atomic_add(counter, 1);
+ +            if (ind < maxCircles)
+ +            {
+ +                circles[ind] = (float4)(cx, cy, convert_float(i + minRadius), 0.0f);
+ +            }
+ +        }
+ +    }
+ +}
diff --cc modules/ocl/src/opencl/moments.cl
Simple merge
diff --cc modules/ocl/src/precomp.hpp

index 873e11b,b2a3e41..d4f743a
--- 1/modules/ocl/src/precomp.hpp
--- 2/modules/ocl/src/precomp.hpp
+++ b/modules/ocl/src/precomp.hpp
@@@ -65,13 -65,14 +65,15 @@@
   #include <exception>
   #include <stdio.h>
   
- #include "opencv2/ocl.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
++#include "opencv2/imgproc.hpp"
   #include "opencv2/imgproc/imgproc_c.h"
   #include "opencv2/core/core_c.h"
- -#include "opencv2/objdetect/objdetect.hpp"
- -#include "opencv2/ocl/ocl.hpp"
++#include "opencv2/objdetect.hpp"
++#include "opencv2/ocl.hpp"
   
+ +#include "opencv2/core/utility.hpp"
   #include "opencv2/core/internal.hpp"
- -//#include "opencv2/highgui/highgui.hpp"
+ +//#include "opencv2/highgui.hpp"
   
   #define __ATI__
   
diff --cc modules/ocl/src/pyrlk.cpp

index 3100946,c8d4b52..6c7f1cc
--- 1/modules/ocl/src/pyrlk.cpp
--- 2/modules/ocl/src/pyrlk.cpp
+++ b/modules/ocl/src/pyrlk.cpp
@@@ -47,7 -47,7 +47,6 @@@
   
   
   #include "precomp.hpp"
- #include "mcwutil.hpp"
- -using namespace std;
   using namespace cv;
   using namespace cv::ocl;
   
@@@ -462,9 -462,9 +461,9 @@@ static void copyTo(const oclMat &src, o
   //     }
   // }
   
- -static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
+ +static void arithmetic_run(const oclMat &src1, oclMat &dst, std::string kernelName, const char **kernelString, void *_scalar)
   {
-     if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+     if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
       {
           CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
           return;
diff --cc modules/ocl/src/split_merge.cpp
Simple merge
diff --cc modules/ocl/src/stereobm.cpp

index 0000000,fe3b255..ec5623f

mode 000000,100644..100644
--- /dev/null
--- 2/modules/ocl/src/stereobm.cpp
+++ b/modules/ocl/src/stereobm.cpp
@@@ -1,0 -1,263 +1,260 @@@
- -using namespace std;
- -
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+ // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // @Authors
+ //    Jia Haipeng, jiahaipeng95@gmail.com
+ //    Xiaopeng Fu, xiaopeng@multicorewareinc.com
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other oclMaterials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #include "precomp.hpp"
+ #include <vector>
+ 
+ using namespace cv;
+ using namespace cv::ocl;
- -    string kernelName = "prefilter_xsobel";
+ 
+ namespace cv
+ {
+ namespace ocl
+ {
+ 
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *stereobm;
+ 
+ }
+ }
+ namespace cv
+ {
+ namespace ocl
+ {
+ namespace stereoBM
+ {
+ /////////////////////////////////////////////////////////////////////////
+ //////////////////////////prefilter_xsbel////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////
+ static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap)
+ {
+     Context *clCxt = input.clCxt;
+ 
- -    string kernelName = "stereoKernel";
++    std::string kernelName = "prefilter_xsobel";
+     cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
+ 
+     size_t blockSize = 1;
+     size_t globalThreads[3] = { input.cols, input.rows, 1 };
+     size_t localThreads[3]  = { blockSize, blockSize, 1 };
+ 
+     openCLVerifyKernel(clCxt, kernel,  localThreads);
+     openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&input.data));
+     openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&output.data));
+     openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&input.rows));
+     openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols));
+     openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap));
+ 
+     openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL,
+                                           globalThreads, localThreads, 0, NULL, NULL));
+ 
+     clFinish((cl_command_queue)clCxt->oclCommandQueue());
+     openCLSafeCall(clReleaseKernel(kernel));
+ 
+ }
+ //////////////////////////////////////////////////////////////////////////
+ //////////////////////////////common////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////
+ #define N_DISPARITIES 8
+ #define ROWSperTHREAD 21
+ #define BLOCK_W 128
+ static inline int divUp(int total, int grain)
+ {
+     return (total + grain - 1) / grain;
+ }
+ ////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////stereoBM_GPU////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////
+ static void stereo_bm(const oclMat &left, const oclMat &right,  oclMat &disp,
+                int maxdisp, int winSize,  oclMat &minSSD_buf)
+ {
+     int winsz2 = winSize >> 1;
+ 
+     //if(winsz2 == 0 || winsz2 >= calles_num)
+     //cv::ocl:error("Unsupported window size", __FILE__, __LINE__, __FUNCTION__);
+ 
+     Context *clCxt = left.clCxt;
+ 
- -    string kernelName = "textureness_kernel";
++    std::string kernelName = "stereoKernel";
+     cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
+ 
+     disp.setTo(Scalar_<unsigned char>::all(0));
+     minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF));
+ 
+     size_t minssd_step = minSSD_buf.step / minSSD_buf.elemSize();
+     size_t local_mem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * winsz2)) *
+                             sizeof(cl_uint);
+     //size_t blockSize = 1;
+     size_t localThreads[]  = { BLOCK_W, 1,1};
+     size_t globalThreads[] = { divUp(left.cols - maxdisp - 2 * winsz2, BLOCK_W) *BLOCK_W,
+                                divUp(left.rows - 2 * winsz2, ROWSperTHREAD),
+                                1
+                              };
+ 
+     openCLVerifyKernel(clCxt, kernel, localThreads);
+     openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&left.data));
+     openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&right.data));
+     openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&minSSD_buf.data));
+     openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&minssd_step));
+     openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&disp.data));
+     openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&disp.step));
+     openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
+     openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&left.rows));
+     openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&left.step));
+     openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&maxdisp));
+     openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2));
+     openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL));
+ 
+     openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
+                                           globalThreads, localThreads, 0, NULL, NULL));
+ 
+ 
+     clFinish((cl_command_queue)clCxt->oclCommandQueue());
+     openCLSafeCall(clReleaseKernel(kernel));
+ }
+ ////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////////postfilter_textureness///////////////////////
+ ////////////////////////////////////////////////////////////////////////////
+ static void postfilter_textureness(oclMat &left, int winSize,
+                             float avergeTexThreshold, oclMat &disparity)
+ {
+     Context *clCxt = left.clCxt;
+ 
- -
++    std::string kernelName = "textureness_kernel";
+     cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
+ 
+     size_t blockSize = 1;
+     size_t localThreads[]  = { BLOCK_W, blockSize ,1};
+     size_t globalThreads[] = { divUp(left.cols, BLOCK_W) *BLOCK_W,
+                                divUp(left.rows, 2 * ROWSperTHREAD),
+                                1
+                              };
+ 
+     size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float);
+ 
+     openCLVerifyKernel(clCxt, kernel,  localThreads);
+     openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disparity.data));
+     openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_int), (void *)&disparity.rows));
+     openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&disparity.cols));
+     openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&disparity.step));
+     openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&left.data));
+     openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&left.rows));
+     openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols));
+     openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize));
+     openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold));
+     openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL));
+     openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
+                                           globalThreads, localThreads, 0, NULL, NULL));
+ 
+     clFinish((cl_command_queue)clCxt->oclCommandQueue());
+     openCLSafeCall(clReleaseKernel(kernel));
+ }
+ //////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////operator/////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////
+ static void operator_(oclMat &minSSD, oclMat &leBuf, oclMat &riBuf, int preset, int ndisp,
+                int winSize, float avergeTexThreshold, const oclMat &left,
+                const oclMat &right, oclMat &disparity)
+ 
+ {
+     CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
+     CV_DbgAssert(left.type() == CV_8UC1);
+     CV_DbgAssert(right.type() == CV_8UC1);
+ 
+     disparity.create(left.size(), CV_8UC1);
+     minSSD.create(left.size(), CV_32SC1);
+ 
+     oclMat le_for_bm =  left;
+     oclMat ri_for_bm = right;
+ 
+     if (preset == cv::ocl::StereoBM_OCL::PREFILTER_XSOBEL)
+     {
+         leBuf.create( left.size(),  left.type());
+         riBuf.create(right.size(), right.type());
+ 
+         prefilter_xsobel( left, leBuf, 31);
+         prefilter_xsobel(right, riBuf, 31);
+ 
+         le_for_bm = leBuf;
+         ri_for_bm = riBuf;
+     }
+ 
+     stereo_bm(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD);
+ 
+     if (avergeTexThreshold)
+     {
+         postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity);
+     }
+ }
+ }
+ }
+ }
+ const float defaultAvgTexThreshold = 3;
+ 
+ cv::ocl::StereoBM_OCL::StereoBM_OCL()
+     : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ),
+       avergeTexThreshold(defaultAvgTexThreshold)  {}
+ 
+ cv::ocl::StereoBM_OCL::StereoBM_OCL(int preset_, int ndisparities_, int winSize_)
+     : preset(preset_), ndisp(ndisparities_), winSize(winSize_),
+       avergeTexThreshold(defaultAvgTexThreshold)
+ {
+     const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);
+     CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp);
+     CV_Assert(ndisp % 8 == 0);
+     CV_Assert(winSize % 2 == 1);
+ }
+ 
+ bool cv::ocl::StereoBM_OCL::checkIfGpuCallReasonable()
+ {
+     return true;
+ }
+ 
+ void cv::ocl::StereoBM_OCL::operator() ( const oclMat &left, const oclMat &right,
+         oclMat &disparity)
+ {
+     cv::ocl::stereoBM::operator_(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity);
+ }
diff --cc modules/ocl/test/precomp.hpp

index 755a2d2,eec938e..cef6479
--- 1/modules/ocl/test/precomp.hpp
--- 2/modules/ocl/test/precomp.hpp
+++ b/modules/ocl/test/precomp.hpp
@@@ -62,13 -62,13 +62,12 @@@
   #include <string>
   #include <cstdarg>
   #include "cvconfig.h"
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -//#include "opencv2/calib3d/calib3d.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/video/video.hpp"
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/ocl/ocl.hpp"
+ +#include "opencv2/ts.hpp"
+ +#include "opencv2/highgui.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/video.hpp"
+ +#include "opencv2/ocl.hpp"
+ +//#include "opencv2/calib3d.hpp"
- //#include "opencv2/nonfree.hpp"
   
   #include "utility.hpp"
   #include "interpolation.hpp"
diff --cc modules/ocl/test/test_calib3d.cpp

index a199b70,58dbcc2..b429625
--- 1/modules/ocl/src/mcwutil.hpp
--- 2/modules/ocl/test/test_calib3d.cpp
+++ b/modules/ocl/test/test_calib3d.cpp
@@@ -43,38 -43,52 +43,52 @@@
   //
   //M*/
   
- #ifndef _OPENCV_MCWUTIL_
- #define _OPENCV_MCWUTIL_
- 
   #include "precomp.hpp"
+ #include <iomanip>
+ 
+ #ifdef HAVE_OPENCL
   
- namespace cv
+ using namespace cv;
+ 
+ extern std::string workdir;
+ PARAM_TEST_CASE(StereoMatchBM, int, int)
   {
-     namespace ocl
+     int n_disp;
+     int winSize;
+ 
+     virtual void SetUp()
       {
-         enum FLUSH_MODE
+         n_disp  = GET_PARAM(0);
- -              winSize = GET_PARAM(1);
++        winSize = GET_PARAM(1);
+     }
+ };
+ 
+ TEST_P(StereoMatchBM, Accuracy)
- -{
+ +        {
-             CLFINISH = 0,
-             CLFLUSH,
-             DISABLE
-         };
-         void openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
-                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
-         void openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
-                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                                   int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
-         // bind oclMat to OpenCL image textures
-         // note:
-         //   1. there is no memory management. User need to explicitly release the resource
-         //   2. for faster clamping, there is no buffer padding for the constructed texture
-         cl_mem bindTexture(const oclMat &mat);
-         void releaseTexture(cl_mem& texture);
   
-         // returns whether the current context supports image2d_t format or not
-         bool support_image2d(Context *clCxt = Context::getContext());
+     Mat left_image  = readImage(workdir + "../ocl/aloe-L.png", IMREAD_GRAYSCALE);
+     Mat right_image = readImage(workdir + "../ocl/aloe-R.png", IMREAD_GRAYSCALE);
+     Mat disp_gold   = readImage(workdir + "../ocl/aloe-disp.png", IMREAD_GRAYSCALE);
- -      ocl::oclMat d_left, d_right;
- -      ocl::oclMat d_disp(left_image.size(), CV_8U);
- -      Mat  disp;
++    ocl::oclMat d_left, d_right;
++    ocl::oclMat d_disp(left_image.size(), CV_8U);
++    Mat  disp;
+ 
+     ASSERT_FALSE(left_image.empty());
+     ASSERT_FALSE(right_image.empty());
+     ASSERT_FALSE(disp_gold.empty());
- -      d_left.upload(left_image);
- -      d_right.upload(right_image);
++    d_left.upload(left_image);
++    d_right.upload(right_image);
+ 
+     ocl::StereoBM_OCL bm(0, n_disp, winSize);
+ 
+ 
+     bm(d_left, d_right, d_disp);
- -      d_disp.download(disp);
++    d_disp.download(disp);
   
-     }//namespace ocl
+     EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-3);
+ }
   
- }//namespace cv
+ INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128),
- -                                         testing::Values(19)));
++                                       testing::Values(19)));
   
- #endif //_OPENCV_MCWUTIL_
+ #endif // HAVE_OPENCL
diff --cc modules/stitching/include/opencv2/stitching/detail/matchers.hpp

index d6e3b78,108cd0f..7461a67
--- 1/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
--- 2/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
@@@ -43,12 -43,13 +43,12 @@@
   #ifndef __OPENCV_STITCHING_MATCHERS_HPP__
   #define __OPENCV_STITCHING_MATCHERS_HPP__
   
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/features2d/features2d.hpp"
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/features2d.hpp"
   
   #include "opencv2/opencv_modules.hpp"
- #ifdef HAVE_OPENCV_GPU
- #include "opencv2/gpu.hpp"
- -
+ #if defined(HAVE_OPENCV_NONFREE) && defined(HAVE_OPENCV_GPU)
- -    #include "opencv2/nonfree/gpu.hpp"
++#include "opencv2/nonfree/gpu.hpp"
   #endif
   
   namespace cv {
diff --cc modules/stitching/src/matchers.cpp
Simple merge
diff --cc modules/stitching/src/precomp.hpp

index 7d75786,4849ace..15508a4
--- 1/modules/stitching/src/precomp.hpp
--- 2/modules/stitching/src/precomp.hpp
+++ b/modules/stitching/src/precomp.hpp
@@@ -67,11 -67,15 +67,14 @@@
   #include "opencv2/stitching/detail/seam_finders.hpp"
   #include "opencv2/stitching/detail/util.hpp"
   #include "opencv2/stitching/detail/warpers.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/features2d/features2d.hpp"
- -#include "opencv2/calib3d/calib3d.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/features2d.hpp"
+ +#include "opencv2/calib3d.hpp"
   #ifdef HAVE_OPENCV_GPU
- # include "opencv2/gpu.hpp"
- -    #include "opencv2/gpu/gpu.hpp"
- -
- -    #ifdef HAVE_OPENCV_NONFREE
- -        #include "opencv2/nonfree/gpu.hpp"
- -    #endif
++#  include "opencv2/gpu.hpp"
++#  ifdef HAVE_OPENCV_NONFREE
++#    include "opencv2/nonfree/gpu.hpp"
++#  endif
   #endif
   
   #include "../../imgproc/src/gcgraph.hpp"
diff --cc modules/stitching/src/stitcher.cpp
Simple merge
diff --cc modules/superres/include/opencv2/superres.hpp

index 0000000,e3e7a1e..bb21610

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/include/opencv2/superres/superres.hpp
+++ b/modules/superres/include/opencv2/superres.hpp
@@@ -1,0 -1,98 +1,98 @@@
- -#include "opencv2/core/core.hpp"
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #ifndef __OPENCV_SUPERRES_HPP__
+ #define __OPENCV_SUPERRES_HPP__
+ 
++#include "opencv2/core.hpp"
+ 
+ namespace cv
+ {
+     namespace superres
+     {
+         CV_EXPORTS bool initModule_superres();
+ 
+         class CV_EXPORTS FrameSource
+         {
+         public:
+             virtual ~FrameSource();
+ 
+             virtual void nextFrame(OutputArray frame) = 0;
+             virtual void reset() = 0;
+         };
+ 
+         CV_EXPORTS Ptr<FrameSource> createFrameSource_Empty();
+ 
+         CV_EXPORTS Ptr<FrameSource> createFrameSource_Video(const std::string& fileName);
+         CV_EXPORTS Ptr<FrameSource> createFrameSource_Video_GPU(const std::string& fileName);
+ 
+         CV_EXPORTS Ptr<FrameSource> createFrameSource_Camera(int deviceId = 0);
+ 
+         class CV_EXPORTS SuperResolution : public cv::Algorithm, public FrameSource
+         {
+         public:
+             void setInput(const Ptr<FrameSource>& frameSource);
+ 
+             void nextFrame(OutputArray frame);
+             void reset();
+ 
+             virtual void collectGarbage();
+ 
+         protected:
+             SuperResolution();
+ 
+             virtual void initImpl(Ptr<FrameSource>& frameSource) = 0;
+             virtual void processImpl(Ptr<FrameSource>& frameSource, OutputArray output) = 0;
+ 
+         private:
+             Ptr<FrameSource> frameSource_;
+             bool firstCall_;
+         };
+ 
+         // S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution.
+         // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
+         CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1();
+         CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1_GPU();
+     }
+ }
+ 
+ #endif // __OPENCV_SUPERRES_HPP__
diff --cc modules/superres/include/opencv2/superres/optical_flow.hpp

index 0000000,bb344fc..cd0bb31

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/include/opencv2/superres/optical_flow.hpp
+++ b/modules/superres/include/opencv2/superres/optical_flow.hpp
@@@ -1,0 -1,73 +1,73 @@@
- -#include "opencv2/core/core.hpp"
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #ifndef __OPENCV_SUPERRES_OPTICAL_FLOW_HPP__
+ #define __OPENCV_SUPERRES_OPTICAL_FLOW_HPP__
+ 
++#include "opencv2/core.hpp"
+ 
+ namespace cv
+ {
+     namespace superres
+     {
+         class CV_EXPORTS DenseOpticalFlowExt : public cv::Algorithm
+         {
+         public:
+             virtual void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2 = noArray()) = 0;
+             virtual void collectGarbage() = 0;
+         };
+ 
+         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Farneback();
+         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Farneback_GPU();
+ 
+         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Simple();
+ 
+         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1();
+         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1_GPU();
+ 
+         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Brox_GPU();
+ 
+         CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_PyrLK_GPU();
+     }
+ }
+ 
+ #endif // __OPENCV_SUPERRES_OPTICAL_FLOW_HPP__
diff --cc modules/superres/perf/perf_precomp.hpp

index 0000000,8232a26..27dfd18

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/perf/perf_precomp.hpp
+++ b/modules/superres/perf/perf_precomp.hpp
@@@ -1,0 -1,27 +1,27 @@@
- -#include "opencv2/core/core.hpp"
+ #ifdef __GNUC__
+ #  pragma GCC diagnostic ignored "-Wmissing-declarations"
+ #  if defined __clang__ || defined __APPLE__
+ #    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+ #    pragma GCC diagnostic ignored "-Wextra"
+ #  endif
+ #endif
+ 
+ #ifndef __OPENCV_PERF_PRECOMP_HPP__
+ #define __OPENCV_PERF_PRECOMP_HPP__
+ 
+ #ifdef HAVE_CVCONFIG_H
+ #include "cvconfig.h"
+ #endif
+ 
- -#include "opencv2/superres/superres.hpp"
++#include "opencv2/core.hpp"
+ #include "opencv2/core/gpumat.hpp"
+ #include "opencv2/ts/ts_perf.hpp"
+ #include "opencv2/ts/gpu_perf.hpp"
++#include "opencv2/superres.hpp"
+ #include "opencv2/superres/optical_flow.hpp"
+ 
+ #ifdef GTEST_CREATE_SHARED_LIBRARY
+ #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+ #endif
+ 
+ #endif
diff --cc modules/superres/src/btv_l1.cpp

index 0000000,71c6836..ed5acaf

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/btv_l1.cpp
+++ b/modules/superres/src/btv_l1.cpp
@@@ -1,0 -1,619 +1,618 @@@
- -using namespace std;
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ // S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution.
+ // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
+ 
+ #include "precomp.hpp"
+ 
- -    void calcRelativeMotions(const vector<Mat>& forwardMotions, const vector<Mat>& backwardMotions,
- -                             vector<Mat>& relForwardMotions, vector<Mat>& relBackwardMotions,
+ using namespace cv;
+ using namespace cv::superres;
+ using namespace cv::superres::detail;
+ 
+ namespace
+ {
- -    void upscaleMotions(const vector<Mat>& lowResMotions, vector<Mat>& highResMotions, int scale)
++    void calcRelativeMotions(const std::vector<Mat>& forwardMotions, const std::vector<Mat>& backwardMotions,
++                             std::vector<Mat>& relForwardMotions, std::vector<Mat>& relBackwardMotions,
+                              int baseIdx, Size size)
+     {
+         const int count = static_cast<int>(forwardMotions.size());
+ 
+         relForwardMotions.resize(count);
+         relForwardMotions[baseIdx].create(size, CV_32FC2);
+         relForwardMotions[baseIdx].setTo(Scalar::all(0));
+ 
+         relBackwardMotions.resize(count);
+         relBackwardMotions[baseIdx].create(size, CV_32FC2);
+         relBackwardMotions[baseIdx].setTo(Scalar::all(0));
+ 
+         for (int i = baseIdx - 1; i >= 0; --i)
+         {
+             add(relForwardMotions[i + 1], forwardMotions[i], relForwardMotions[i]);
+ 
+             add(relBackwardMotions[i + 1], backwardMotions[i + 1], relBackwardMotions[i]);
+         }
+ 
+         for (int i = baseIdx + 1; i < count; ++i)
+         {
+             add(relForwardMotions[i - 1], backwardMotions[i], relForwardMotions[i]);
+ 
+             add(relBackwardMotions[i - 1], forwardMotions[i - 1], relBackwardMotions[i]);
+         }
+     }
+ 
- -    void calcBtvWeights(int btvKernelSize, double alpha, vector<float>& btvWeights)
++    void upscaleMotions(const std::vector<Mat>& lowResMotions, std::vector<Mat>& highResMotions, int scale)
+     {
+         highResMotions.resize(lowResMotions.size());
+ 
+         for (size_t i = 0; i < lowResMotions.size(); ++i)
+         {
+             resize(lowResMotions[i], highResMotions[i], Size(), scale, scale, INTER_CUBIC);
+             multiply(highResMotions[i], Scalar::all(scale), highResMotions[i]);
+         }
+     }
+ 
+     void buildMotionMaps(const Mat& forwardMotion, const Mat& backwardMotion, Mat& forwardMap, Mat& backwardMap)
+     {
+         forwardMap.create(forwardMotion.size(), CV_32FC2);
+         backwardMap.create(forwardMotion.size(), CV_32FC2);
+ 
+         for (int y = 0; y < forwardMotion.rows; ++y)
+         {
+             const Point2f* forwardMotionRow = forwardMotion.ptr<Point2f>(y);
+             const Point2f* backwardMotionRow = backwardMotion.ptr<Point2f>(y);
+             Point2f* forwardMapRow = forwardMap.ptr<Point2f>(y);
+             Point2f* backwardMapRow = backwardMap.ptr<Point2f>(y);
+ 
+             for (int x = 0; x < forwardMotion.cols; ++x)
+             {
+                 Point2f base(static_cast<float>(x), static_cast<float>(y));
+ 
+                 forwardMapRow[x] = base + backwardMotionRow[x];
+                 backwardMapRow[x] = base + forwardMotionRow[x];
+             }
+         }
+     }
+ 
+     template <typename T>
+     void upscaleImpl(const Mat& src, Mat& dst, int scale)
+     {
+         dst.create(src.rows * scale, src.cols * scale, src.type());
+         dst.setTo(Scalar::all(0));
+ 
+         for (int y = 0, Y = 0; y < src.rows; ++y, Y += scale)
+         {
+             const T* srcRow = src.ptr<T>(y);
+             T* dstRow = dst.ptr<T>(Y);
+ 
+             for (int x = 0, X = 0; x < src.cols; ++x, X += scale)
+                 dstRow[X] = srcRow[x];
+         }
+     }
+ 
+     void upscale(const Mat& src, Mat& dst, int scale)
+     {
+         typedef void (*func_t)(const Mat& src, Mat& dst, int scale);
+         static const func_t funcs[] =
+         {
+             0, upscaleImpl<float>, 0, upscaleImpl<Point3f>
+         };
+ 
+         CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
+ 
+         const func_t func = funcs[src.channels()];
+ 
+         func(src, dst, scale);
+     }
+ 
+     float diffSign(float a, float b)
+     {
+         return a > b ? 1.0f : a < b ? -1.0f : 0.0f;
+     }
+     Point3f diffSign(Point3f a, Point3f b)
+     {
+         return Point3f(
+             a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f,
+             a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f,
+             a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f
+         );
+     }
+ 
+     void diffSign(const Mat& src1, const Mat& src2, Mat& dst)
+     {
+         const int count = src1.cols * src1.channels();
+ 
+         dst.create(src1.size(), src1.type());
+ 
+         for (int y = 0; y < src1.rows; ++y)
+         {
+             const float* src1Ptr = src1.ptr<float>(y);
+             const float* src2Ptr = src2.ptr<float>(y);
+             float* dstPtr = dst.ptr<float>(y);
+ 
+             for (int x = 0; x < count; ++x)
+                 dstPtr[x] = diffSign(src1Ptr[x], src2Ptr[x]);
+         }
+     }
+ 
- -    void calcBtvRegularizationImpl(const Mat& src, Mat& dst, int btvKernelSize, const vector<float>& btvWeights)
++    void calcBtvWeights(int btvKernelSize, double alpha, std::vector<float>& btvWeights)
+     {
+         const size_t size = btvKernelSize * btvKernelSize;
+ 
+         btvWeights.resize(size);
+ 
+         const int ksize = (btvKernelSize - 1) / 2;
+         const float alpha_f = static_cast<float>(alpha);
+ 
+         for (int m = 0, ind = 0; m <= ksize; ++m)
+         {
+             for (int l = ksize; l + m >= 0; --l, ++ind)
+                 btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l));
+         }
+     }
+ 
+     template <typename T>
+     struct BtvRegularizationBody : ParallelLoopBody
+     {
+         void operator ()(const Range& range) const;
+ 
+         Mat src;
+         mutable Mat dst;
+         int ksize;
+         const float* btvWeights;
+     };
+ 
+     template <typename T>
+     void BtvRegularizationBody<T>::operator ()(const Range& range) const
+     {
+         for (int i = range.start; i < range.end; ++i)
+         {
+             const T* srcRow = src.ptr<T>(i);
+             T* dstRow = dst.ptr<T>(i);
+ 
+             for(int j = ksize; j < src.cols - ksize; ++j)
+             {
+                 const T srcVal = srcRow[j];
+ 
+                 for (int m = 0, ind = 0; m <= ksize; ++m)
+                 {
+                     const T* srcRow2 = src.ptr<T>(i - m);
+                     const T* srcRow3 = src.ptr<T>(i + m);
+ 
+                     for (int l = ksize; l + m >= 0; --l, ++ind)
+                     {
+                         dstRow[j] += btvWeights[ind] * (diffSign(srcVal, srcRow3[j + l]) - diffSign(srcRow2[j - l], srcVal));
+                     }
+                 }
+             }
+         }
+     }
+ 
+     template <typename T>
- -    void calcBtvRegularization(const Mat& src, Mat& dst, int btvKernelSize, const vector<float>& btvWeights)
++    void calcBtvRegularizationImpl(const Mat& src, Mat& dst, int btvKernelSize, const std::vector<float>& btvWeights)
+     {
+         dst.create(src.size(), src.type());
+         dst.setTo(Scalar::all(0));
+ 
+         const int ksize = (btvKernelSize - 1) / 2;
+ 
+         BtvRegularizationBody<T> body;
+ 
+         body.src = src;
+         body.dst = dst;
+         body.ksize = ksize;
+         body.btvWeights = &btvWeights[0];
+ 
+         parallel_for_(Range(ksize, src.rows - ksize), body);
+     }
+ 
- -        typedef void (*func_t)(const Mat& src, Mat& dst, int btvKernelSize, const vector<float>& btvWeights);
++    void calcBtvRegularization(const Mat& src, Mat& dst, int btvKernelSize, const std::vector<float>& btvWeights)
+     {
- -        void process(const vector<Mat>& src, Mat& dst,
- -                     const vector<Mat>& forwardMotions, const vector<Mat>& backwardMotions,
++        typedef void (*func_t)(const Mat& src, Mat& dst, int btvKernelSize, const std::vector<float>& btvWeights);
+         static const func_t funcs[] =
+         {
+             0, calcBtvRegularizationImpl<float>, 0, calcBtvRegularizationImpl<Point3f>
+         };
+ 
+         const func_t func = funcs[src.channels()];
+ 
+         func(src, dst, btvKernelSize, btvWeights);
+     }
+ 
+     class BTVL1_Base
+     {
+     public:
+         BTVL1_Base();
+ 
- -        vector<float> btvWeights_;
++        void process(const std::vector<Mat>& src, Mat& dst,
++                     const std::vector<Mat>& forwardMotions, const std::vector<Mat>& backwardMotions,
+                      int baseIdx);
+ 
+         void collectGarbage();
+ 
+     protected:
+         int scale_;
+         int iterations_;
+         double tau_;
+         double lambda_;
+         double alpha_;
+         int btvKernelSize_;
+         int blurKernelSize_;
+         double blurSigma_;
+         Ptr<DenseOpticalFlowExt> opticalFlow_;
+ 
+     private:
+         Ptr<FilterEngine> filter_;
+         int curBlurKernelSize_;
+         double curBlurSigma_;
+         int curSrcType_;
+ 
- -        vector<Mat> lowResForwardMotions_;
- -        vector<Mat> lowResBackwardMotions_;
++        std::vector<float> btvWeights_;
+         int curBtvKernelSize_;
+         double curAlpha_;
+ 
- -        vector<Mat> highResForwardMotions_;
- -        vector<Mat> highResBackwardMotions_;
++        std::vector<Mat> lowResForwardMotions_;
++        std::vector<Mat> lowResBackwardMotions_;
+ 
- -        vector<Mat> forwardMaps_;
- -        vector<Mat> backwardMaps_;
++        std::vector<Mat> highResForwardMotions_;
++        std::vector<Mat> highResBackwardMotions_;
+ 
- -    void BTVL1_Base::process(const vector<Mat>& src, Mat& dst, const vector<Mat>& forwardMotions, const vector<Mat>& backwardMotions, int baseIdx)
++        std::vector<Mat> forwardMaps_;
++        std::vector<Mat> backwardMaps_;
+ 
+         Mat highRes_;
+ 
+         Mat diffTerm_, regTerm_;
+         Mat a_, b_, c_;
+     };
+ 
+     BTVL1_Base::BTVL1_Base()
+     {
+         scale_ = 4;
+         iterations_ = 180;
+         lambda_ = 0.03;
+         tau_ = 1.3;
+         alpha_ = 0.7;
+         btvKernelSize_ = 7;
+         blurKernelSize_ = 5;
+         blurSigma_ = 0.0;
+         opticalFlow_ = createOptFlow_Farneback();
+ 
+         curBlurKernelSize_ = -1;
+         curBlurSigma_ = -1.0;
+         curSrcType_ = -1;
+ 
+         curBtvKernelSize_ = -1;
+         curAlpha_ = -1.0;
+     }
+ 
- -        vector<Mat> frames_;
- -        vector<Mat> forwardMotions_;
- -        vector<Mat> backwardMotions_;
- -        vector<Mat> outputs_;
++    void BTVL1_Base::process(const std::vector<Mat>& src, Mat& dst, const std::vector<Mat>& forwardMotions, const std::vector<Mat>& backwardMotions, int baseIdx)
+     {
+         CV_Assert( scale_ > 1 );
+         CV_Assert( iterations_ > 0 );
+         CV_Assert( tau_ > 0.0 );
+         CV_Assert( alpha_ > 0.0 );
+         CV_Assert( btvKernelSize_ > 0 );
+         CV_Assert( blurKernelSize_ > 0 );
+         CV_Assert( blurSigma_ >= 0.0 );
+ 
+         // update blur filter and btv weights
+ 
+         if (filter_.empty() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
+         {
+             filter_ = createGaussianFilter(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
+             curBlurKernelSize_ = blurKernelSize_;
+             curBlurSigma_ = blurSigma_;
+             curSrcType_ = src[0].type();
+         }
+ 
+         if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_)
+         {
+             calcBtvWeights(btvKernelSize_, alpha_, btvWeights_);
+             curBtvKernelSize_ = btvKernelSize_;
+             curAlpha_ = alpha_;
+         }
+ 
+         // calc high res motions
+ 
+         calcRelativeMotions(forwardMotions, backwardMotions, lowResForwardMotions_, lowResBackwardMotions_, baseIdx, src[0].size());
+ 
+         upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_);
+         upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_);
+ 
+         forwardMaps_.resize(highResForwardMotions_.size());
+         backwardMaps_.resize(highResForwardMotions_.size());
+         for (size_t i = 0; i < highResForwardMotions_.size(); ++i)
+             buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]);
+ 
+         // initial estimation
+ 
+         const Size lowResSize = src[0].size();
+         const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);
+ 
+         resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_CUBIC);
+ 
+         // iterations
+ 
+         diffTerm_.create(highResSize, highRes_.type());
+         a_.create(highResSize, highRes_.type());
+         b_.create(highResSize, highRes_.type());
+         c_.create(lowResSize, highRes_.type());
+ 
+         for (int i = 0; i < iterations_; ++i)
+         {
+             diffTerm_.setTo(Scalar::all(0));
+ 
+             for (size_t k = 0; k < src.size(); ++k)
+             {
+                 // a = M * Ih
+                 remap(highRes_, a_, backwardMaps_[k], noArray(), INTER_NEAREST);
+                 // b = HM * Ih
+                 filter_->apply(a_, b_);
+                 // c = DHM * Ih
+                 resize(b_, c_, lowResSize, 0, 0, INTER_NEAREST);
+ 
+                 diffSign(src[k], c_, c_);
+ 
+                 // a = Dt * diff
+                 upscale(c_, a_, scale_);
+                 // b = HtDt * diff
+                 filter_->apply(a_, b_);
+                 // a = MtHtDt * diff
+                 remap(b_, a_, forwardMaps_[k], noArray(), INTER_NEAREST);
+ 
+                 add(diffTerm_, a_, diffTerm_);
+             }
+ 
+             if (lambda_ > 0)
+             {
+                 calcBtvRegularization(highRes_, regTerm_, btvKernelSize_, btvWeights_);
+                 addWeighted(diffTerm_, 1.0, regTerm_, -lambda_, 0.0, diffTerm_);
+             }
+ 
+             addWeighted(highRes_, 1.0, diffTerm_, tau_, 0.0, highRes_);
+         }
+ 
+         Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_);
+         highRes_(inner).copyTo(dst);
+     }
+ 
+     void BTVL1_Base::collectGarbage()
+     {
+         filter_.release();
+ 
+         lowResForwardMotions_.clear();
+         lowResBackwardMotions_.clear();
+ 
+         highResForwardMotions_.clear();
+         highResBackwardMotions_.clear();
+ 
+         forwardMaps_.clear();
+         backwardMaps_.clear();
+ 
+         highRes_.release();
+ 
+         diffTerm_.release();
+         regTerm_.release();
+         a_.release();
+         b_.release();
+         c_.release();
+     }
+ 
+ ////////////////////////////////////////////////////////////////////
+ 
+     class BTVL1 : public SuperResolution, private BTVL1_Base
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         BTVL1();
+ 
+         void collectGarbage();
+ 
+     protected:
+         void initImpl(Ptr<FrameSource>& frameSource);
+         void processImpl(Ptr<FrameSource>& frameSource, OutputArray output);
+ 
+     private:
+         int temporalAreaRadius_;
+ 
+         void readNextFrame(Ptr<FrameSource>& frameSource);
+         void processFrame(int idx);
+ 
+         Mat curFrame_;
+         Mat prevFrame_;
+ 
- -        vector<Mat> srcFrames_;
- -        vector<Mat> srcForwardMotions_;
- -        vector<Mat> srcBackwardMotions_;
++        std::vector<Mat> frames_;
++        std::vector<Mat> forwardMotions_;
++        std::vector<Mat> backwardMotions_;
++        std::vector<Mat> outputs_;
+ 
+         int storePos_;
+         int procPos_;
+         int outPos_;
+ 
- -        const int startIdx = max(idx - temporalAreaRadius_, 0);
++        std::vector<Mat> srcFrames_;
++        std::vector<Mat> srcForwardMotions_;
++        std::vector<Mat> srcBackwardMotions_;
+         Mat finalOutput_;
+     };
+ 
+     CV_INIT_ALGORITHM(BTVL1, "SuperResolution.BTVL1",
+                       obj.info()->addParam(obj, "scale", obj.scale_, false, 0, 0, "Scale factor.");
+                       obj.info()->addParam(obj, "iterations", obj.iterations_, false, 0, 0, "Iteration count.");
+                       obj.info()->addParam(obj, "tau", obj.tau_, false, 0, 0, "Asymptotic value of steepest descent method.");
+                       obj.info()->addParam(obj, "lambda", obj.lambda_, false, 0, 0, "Weight parameter to balance data term and smoothness term.");
+                       obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Parameter of spacial distribution in Bilateral-TV.");
+                       obj.info()->addParam(obj, "btvKernelSize", obj.btvKernelSize_, false, 0, 0, "Kernel size of Bilateral-TV filter.");
+                       obj.info()->addParam(obj, "blurKernelSize", obj.blurKernelSize_, false, 0, 0, "Gaussian blur kernel size.");
+                       obj.info()->addParam(obj, "blurSigma", obj.blurSigma_, false, 0, 0, "Gaussian blur sigma.");
+                       obj.info()->addParam(obj, "temporalAreaRadius", obj.temporalAreaRadius_, false, 0, 0, "Radius of the temporal search area.");
+                       obj.info()->addParam<DenseOpticalFlowExt>(obj, "opticalFlow", obj.opticalFlow_, false, 0, 0, "Dense optical flow algorithm."));
+ 
+     BTVL1::BTVL1()
+     {
+         temporalAreaRadius_ = 4;
+     }
+ 
+     void BTVL1::collectGarbage()
+     {
+         curFrame_.release();
+         prevFrame_.release();
+ 
+         frames_.clear();
+         forwardMotions_.clear();
+         backwardMotions_.clear();
+         outputs_.clear();
+ 
+         srcFrames_.clear();
+         srcForwardMotions_.clear();
+         srcBackwardMotions_.clear();
+         finalOutput_.release();
+ 
+         SuperResolution::collectGarbage();
+         BTVL1_Base::collectGarbage();
+     }
+ 
+     void BTVL1::initImpl(Ptr<FrameSource>& frameSource)
+     {
+         const int cacheSize = 2 * temporalAreaRadius_ + 1;
+ 
+         frames_.resize(cacheSize);
+         forwardMotions_.resize(cacheSize);
+         backwardMotions_.resize(cacheSize);
+         outputs_.resize(cacheSize);
+ 
+         storePos_ = -1;
+ 
+         for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t)
+             readNextFrame(frameSource);
+ 
+         for (int i = 0; i <= temporalAreaRadius_; ++i)
+             processFrame(i);
+ 
+         procPos_ = temporalAreaRadius_;
+         outPos_ = -1;
+     }
+ 
+     void BTVL1::processImpl(Ptr<FrameSource>& frameSource, OutputArray _output)
+     {
+         if (outPos_ >= storePos_)
+         {
+             _output.release();
+             return;
+         }
+ 
+         readNextFrame(frameSource);
+ 
+         if (procPos_ < storePos_)
+         {
+             ++procPos_;
+             processFrame(procPos_);
+         }
+ 
+         ++outPos_;
+         const Mat& curOutput = at(outPos_, outputs_);
+ 
+         if (_output.kind() < _InputArray::OPENGL_BUFFER)
+             curOutput.convertTo(_output, CV_8U);
+         else
+         {
+             curOutput.convertTo(finalOutput_, CV_8U);
+             arrCopy(finalOutput_, _output);
+         }
+     }
+ 
+     void BTVL1::readNextFrame(Ptr<FrameSource>& frameSource)
+     {
+         frameSource->nextFrame(curFrame_);
+ 
+         if (curFrame_.empty())
+             return;
+ 
+         ++storePos_;
+         curFrame_.convertTo(at(storePos_, frames_), CV_32F);
+ 
+         if (storePos_ > 0)
+         {
+             opticalFlow_->calc(prevFrame_, curFrame_, at(storePos_ - 1, forwardMotions_));
+             opticalFlow_->calc(curFrame_, prevFrame_, at(storePos_, backwardMotions_));
+         }
+ 
+         curFrame_.copyTo(prevFrame_);
+     }
+ 
+     void BTVL1::processFrame(int idx)
+     {
- -        const int endIdx = min(startIdx + 2 * temporalAreaRadius_, storePos_);
++        const int startIdx = std::max(idx - temporalAreaRadius_, 0);
+         const int procIdx = idx;
++        const int endIdx = std::min(startIdx + 2 * temporalAreaRadius_, storePos_);
+ 
+         const int count = endIdx - startIdx + 1;
+ 
+         srcFrames_.resize(count);
+         srcForwardMotions_.resize(count);
+         srcBackwardMotions_.resize(count);
+ 
+         int baseIdx = -1;
+ 
+         for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k)
+         {
+             if (i == procIdx)
+                 baseIdx = k;
+ 
+             srcFrames_[k] = at(i, frames_);
+ 
+             if (i < endIdx)
+                 srcForwardMotions_[k] = at(i, forwardMotions_);
+             if (i > startIdx)
+                 srcBackwardMotions_[k] = at(i, backwardMotions_);
+         }
+ 
+         process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx);
+     }
+ }
+ 
+ Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1()
+ {
+     return new BTVL1;
+ }
diff --cc modules/superres/src/btv_l1_gpu.cpp

index 0000000,4f7b4f1..36b6970

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/btv_l1_gpu.cpp
+++ b/modules/superres/src/btv_l1_gpu.cpp
@@@ -1,0 -1,580 +1,579 @@@
- -using namespace std;
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ // S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution.
+ // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
+ 
+ #include "precomp.hpp"
+ 
+ using namespace cv;
+ using namespace cv::gpu;
+ using namespace cv::superres;
+ using namespace cv::superres::detail;
+ 
+ #if !defined(HAVE_CUDA) || !defined(HAVE_OPENCV_GPU)
+ 
+ Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_GPU()
+ {
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<SuperResolution>();
+ }
+ 
+ #else // HAVE_CUDA
+ 
+ namespace btv_l1_device
+ {
+     void buildMotionMaps(PtrStepSzf forwardMotionX, PtrStepSzf forwardMotionY,
+                          PtrStepSzf backwardMotionX, PtrStepSzf bacwardMotionY,
+                          PtrStepSzf forwardMapX, PtrStepSzf forwardMapY,
+                          PtrStepSzf backwardMapX, PtrStepSzf backwardMapY);
+ 
+     template <int cn>
+     void upscale(const PtrStepSzb src, PtrStepSzb dst, int scale, cudaStream_t stream);
+ 
+     void diffSign(PtrStepSzf src1, PtrStepSzf src2, PtrStepSzf dst, cudaStream_t stream);
+ 
+     void loadBtvWeights(const float* weights, size_t count);
+     template <int cn> void calcBtvRegularization(PtrStepSzb src, PtrStepSzb dst, int ksize);
+ }
+ 
+ namespace
+ {
+     void calcRelativeMotions(const vector<pair<GpuMat, GpuMat> >& forwardMotions, const vector<pair<GpuMat, GpuMat> >& backwardMotions,
+                              vector<pair<GpuMat, GpuMat> >& relForwardMotions, vector<pair<GpuMat, GpuMat> >& relBackwardMotions,
+                              int baseIdx, Size size)
+     {
+         const int count = static_cast<int>(forwardMotions.size());
+ 
+         relForwardMotions.resize(count);
+         relForwardMotions[baseIdx].first.create(size, CV_32FC1);
+         relForwardMotions[baseIdx].first.setTo(Scalar::all(0));
+         relForwardMotions[baseIdx].second.create(size, CV_32FC1);
+         relForwardMotions[baseIdx].second.setTo(Scalar::all(0));
+ 
+         relBackwardMotions.resize(count);
+         relBackwardMotions[baseIdx].first.create(size, CV_32FC1);
+         relBackwardMotions[baseIdx].first.setTo(Scalar::all(0));
+         relBackwardMotions[baseIdx].second.create(size, CV_32FC1);
+         relBackwardMotions[baseIdx].second.setTo(Scalar::all(0));
+ 
+         for (int i = baseIdx - 1; i >= 0; --i)
+         {
+             gpu::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first);
+             gpu::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second);
+ 
+             gpu::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first);
+             gpu::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second);
+         }
+ 
+         for (int i = baseIdx + 1; i < count; ++i)
+         {
+             gpu::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first);
+             gpu::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second);
+ 
+             gpu::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first);
+             gpu::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second);
+         }
+     }
+ 
+     void upscaleMotions(const vector<pair<GpuMat, GpuMat> >& lowResMotions, vector<pair<GpuMat, GpuMat> >& highResMotions, int scale)
+     {
+         highResMotions.resize(lowResMotions.size());
+ 
+         for (size_t i = 0; i < lowResMotions.size(); ++i)
+         {
+             gpu::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_CUBIC);
+             gpu::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_CUBIC);
+ 
+             gpu::multiply(highResMotions[i].first, Scalar::all(scale), highResMotions[i].first);
+             gpu::multiply(highResMotions[i].second, Scalar::all(scale), highResMotions[i].second);
+         }
+     }
+ 
+     void buildMotionMaps(const pair<GpuMat, GpuMat>& forwardMotion, const pair<GpuMat, GpuMat>& backwardMotion,
+                          pair<GpuMat, GpuMat>& forwardMap, pair<GpuMat, GpuMat>& backwardMap)
+     {
+         forwardMap.first.create(forwardMotion.first.size(), CV_32FC1);
+         forwardMap.second.create(forwardMotion.first.size(), CV_32FC1);
+ 
+         backwardMap.first.create(forwardMotion.first.size(), CV_32FC1);
+         backwardMap.second.create(forwardMotion.first.size(), CV_32FC1);
+ 
+         btv_l1_device::buildMotionMaps(forwardMotion.first, forwardMotion.second,
+                                        backwardMotion.first, backwardMotion.second,
+                                        forwardMap.first, forwardMap.second,
+                                        backwardMap.first, backwardMap.second);
+     }
+ 
+     void upscale(const GpuMat& src, GpuMat& dst, int scale, Stream& stream)
+     {
+         typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb dst, int scale, cudaStream_t stream);
+         static const func_t funcs[] =
+         {
+             0, btv_l1_device::upscale<1>, 0, btv_l1_device::upscale<3>, btv_l1_device::upscale<4>
+         };
+ 
+         CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
+ 
+         dst.create(src.rows * scale, src.cols * scale, src.type());
+         dst.setTo(Scalar::all(0));
+ 
+         const func_t func = funcs[src.channels()];
+ 
+         func(src, dst, scale, StreamAccessor::getStream(stream));
+     }
+ 
+     void diffSign(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
+     {
+         dst.create(src1.size(), src1.type());
+ 
+         btv_l1_device::diffSign(src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
+     }
+ 
+     void calcBtvWeights(int btvKernelSize, double alpha, vector<float>& btvWeights)
+     {
+         const size_t size = btvKernelSize * btvKernelSize;
+ 
+         btvWeights.resize(size);
+ 
+         const int ksize = (btvKernelSize - 1) / 2;
+         const float alpha_f = static_cast<float>(alpha);
+ 
+         for (int m = 0, ind = 0; m <= ksize; ++m)
+         {
+             for (int l = ksize; l + m >= 0; --l, ++ind)
+                 btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l));
+         }
+ 
+         btv_l1_device::loadBtvWeights(&btvWeights[0], size);
+     }
+ 
+     void calcBtvRegularization(const GpuMat& src, GpuMat& dst, int btvKernelSize)
+     {
+         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int ksize);
+         static const func_t funcs[] =
+         {
+             0,
+             btv_l1_device::calcBtvRegularization<1>,
+             0,
+             btv_l1_device::calcBtvRegularization<3>,
+             btv_l1_device::calcBtvRegularization<4>
+         };
+ 
+         dst.create(src.size(), src.type());
+         dst.setTo(Scalar::all(0));
+ 
+         const int ksize = (btvKernelSize - 1) / 2;
+ 
+         funcs[src.channels()](src, dst, ksize);
+     }
+ 
+     class BTVL1_GPU_Base
+     {
+     public:
+         BTVL1_GPU_Base();
+ 
+         void process(const vector<GpuMat>& src, GpuMat& dst,
+                      const vector<pair<GpuMat, GpuMat> >& forwardMotions, const vector<pair<GpuMat, GpuMat> >& backwardMotions,
+                      int baseIdx);
+ 
+         void collectGarbage();
+ 
+     protected:
+         int scale_;
+         int iterations_;
+         double lambda_;
+         double tau_;
+         double alpha_;
+         int btvKernelSize_;
+         int blurKernelSize_;
+         double blurSigma_;
+         Ptr<DenseOpticalFlowExt> opticalFlow_;
+ 
+     private:
+         vector<Ptr<FilterEngine_GPU> > filters_;
+         int curBlurKernelSize_;
+         double curBlurSigma_;
+         int curSrcType_;
+ 
+         vector<float> btvWeights_;
+         int curBtvKernelSize_;
+         double curAlpha_;
+ 
+         vector<pair<GpuMat, GpuMat> > lowResForwardMotions_;
+         vector<pair<GpuMat, GpuMat> > lowResBackwardMotions_;
+ 
+         vector<pair<GpuMat, GpuMat> > highResForwardMotions_;
+         vector<pair<GpuMat, GpuMat> > highResBackwardMotions_;
+ 
+         vector<pair<GpuMat, GpuMat> > forwardMaps_;
+         vector<pair<GpuMat, GpuMat> > backwardMaps_;
+ 
+         GpuMat highRes_;
+ 
+         vector<Stream> streams_;
+         vector<GpuMat> diffTerms_;
+         vector<GpuMat> a_, b_, c_;
+         GpuMat regTerm_;
+     };
+ 
+     BTVL1_GPU_Base::BTVL1_GPU_Base()
+     {
+         scale_ = 4;
+         iterations_ = 180;
+         lambda_ = 0.03;
+         tau_ = 1.3;
+         alpha_ = 0.7;
+         btvKernelSize_ = 7;
+         blurKernelSize_ = 5;
+         blurSigma_ = 0.0;
+         opticalFlow_ = createOptFlow_Farneback_GPU();
+ 
+         curBlurKernelSize_ = -1;
+         curBlurSigma_ = -1.0;
+         curSrcType_ = -1;
+ 
+         curBtvKernelSize_ = -1;
+         curAlpha_ = -1.0;
+     }
+ 
+     void BTVL1_GPU_Base::process(const vector<GpuMat>& src, GpuMat& dst,
+                                  const vector<pair<GpuMat, GpuMat> >& forwardMotions, const vector<pair<GpuMat, GpuMat> >& backwardMotions,
+                                  int baseIdx)
+     {
+         CV_Assert( scale_ > 1 );
+         CV_Assert( iterations_ > 0 );
+         CV_Assert( tau_ > 0.0 );
+         CV_Assert( alpha_ > 0.0 );
+         CV_Assert( btvKernelSize_ > 0 && btvKernelSize_ <= 16 );
+         CV_Assert( blurKernelSize_ > 0 );
+         CV_Assert( blurSigma_ >= 0.0 );
+ 
+         // update blur filter and btv weights
+ 
+         if (filters_.size() != src.size() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
+         {
+             filters_.resize(src.size());
+             for (size_t i = 0; i < src.size(); ++i)
+                 filters_[i] = createGaussianFilter_GPU(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
+             curBlurKernelSize_ = blurKernelSize_;
+             curBlurSigma_ = blurSigma_;
+             curSrcType_ = src[0].type();
+         }
+ 
+         if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_)
+         {
+             calcBtvWeights(btvKernelSize_, alpha_, btvWeights_);
+             curBtvKernelSize_ = btvKernelSize_;
+             curAlpha_ = alpha_;
+         }
+ 
+         // calc motions between input frames
+ 
+         calcRelativeMotions(forwardMotions, backwardMotions, lowResForwardMotions_, lowResBackwardMotions_, baseIdx, src[0].size());
+ 
+         upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_);
+         upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_);
+ 
+         forwardMaps_.resize(highResForwardMotions_.size());
+         backwardMaps_.resize(highResForwardMotions_.size());
+         for (size_t i = 0; i < highResForwardMotions_.size(); ++i)
+             buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]);
+ 
+         // initial estimation
+ 
+         const Size lowResSize = src[0].size();
+         const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);
+ 
+         gpu::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_CUBIC);
+ 
+         // iterations
+ 
+         streams_.resize(src.size());
+         diffTerms_.resize(src.size());
+         a_.resize(src.size());
+         b_.resize(src.size());
+         c_.resize(src.size());
+ 
+         for (int i = 0; i < iterations_; ++i)
+         {
+             for (size_t k = 0; k < src.size(); ++k)
+             {
+                 // a = M * Ih
+                 gpu::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);
+                 // b = HM * Ih
+                 filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1), streams_[k]);
+                 // c = DHF * Ih
+                 gpu::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST, streams_[k]);
+ 
+                 diffSign(src[k], c_[k], c_[k], streams_[k]);
+ 
+                 // a = Dt * diff
+                 upscale(c_[k], a_[k], scale_, streams_[k]);
+                 // b = HtDt * diff
+                 filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1), streams_[k]);
+                 // diffTerm = MtHtDt * diff
+                 gpu::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);
+             }
+ 
+             if (lambda_ > 0)
+             {
+                 calcBtvRegularization(highRes_, regTerm_, btvKernelSize_);
+                 gpu::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_);
+             }
+ 
+             for (size_t k = 0; k < src.size(); ++k)
+             {
+                 streams_[k].waitForCompletion();
+                 gpu::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_);
+             }
+         }
+ 
+         Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_);
+         highRes_(inner).copyTo(dst);
+     }
+ 
+     void BTVL1_GPU_Base::collectGarbage()
+     {
+         filters_.clear();
+ 
+         lowResForwardMotions_.clear();
+         lowResBackwardMotions_.clear();
+ 
+         highResForwardMotions_.clear();
+         highResBackwardMotions_.clear();
+ 
+         forwardMaps_.clear();
+         backwardMaps_.clear();
+ 
+         highRes_.release();
+ 
+         diffTerms_.clear();
+         a_.clear();
+         b_.clear();
+         c_.clear();
+         regTerm_.release();
+     }
+ 
+ ////////////////////////////////////////////////////////////
+ 
+     class BTVL1_GPU : public SuperResolution, private BTVL1_GPU_Base
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         BTVL1_GPU();
+ 
+         void collectGarbage();
+ 
+     protected:
+         void initImpl(Ptr<FrameSource>& frameSource);
+         void processImpl(Ptr<FrameSource>& frameSource, OutputArray output);
+ 
+     private:
+         int temporalAreaRadius_;
+ 
+         void readNextFrame(Ptr<FrameSource>& frameSource);
+         void processFrame(int idx);
+ 
+         GpuMat curFrame_;
+         GpuMat prevFrame_;
+ 
+         vector<GpuMat> frames_;
+         vector<pair<GpuMat, GpuMat> > forwardMotions_;
+         vector<pair<GpuMat, GpuMat> > backwardMotions_;
+         vector<GpuMat> outputs_;
+ 
+         int storePos_;
+         int procPos_;
+         int outPos_;
+ 
+         vector<GpuMat> srcFrames_;
+         vector<pair<GpuMat, GpuMat> > srcForwardMotions_;
+         vector<pair<GpuMat, GpuMat> > srcBackwardMotions_;
+         GpuMat finalOutput_;
+     };
+ 
+     CV_INIT_ALGORITHM(BTVL1_GPU, "SuperResolution.BTVL1_GPU",
+                       obj.info()->addParam(obj, "scale", obj.scale_, false, 0, 0, "Scale factor.");
+                       obj.info()->addParam(obj, "iterations", obj.iterations_, false, 0, 0, "Iteration count.");
+                       obj.info()->addParam(obj, "tau", obj.tau_, false, 0, 0, "Asymptotic value of steepest descent method.");
+                       obj.info()->addParam(obj, "lambda", obj.lambda_, false, 0, 0, "Weight parameter to balance data term and smoothness term.");
+                       obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Parameter of spacial distribution in Bilateral-TV.");
+                       obj.info()->addParam(obj, "btvKernelSize", obj.btvKernelSize_, false, 0, 0, "Kernel size of Bilateral-TV filter.");
+                       obj.info()->addParam(obj, "blurKernelSize", obj.blurKernelSize_, false, 0, 0, "Gaussian blur kernel size.");
+                       obj.info()->addParam(obj, "blurSigma", obj.blurSigma_, false, 0, 0, "Gaussian blur sigma.");
+                       obj.info()->addParam(obj, "temporalAreaRadius", obj.temporalAreaRadius_, false, 0, 0, "Radius of the temporal search area.");
+                       obj.info()->addParam<DenseOpticalFlowExt>(obj, "opticalFlow", obj.opticalFlow_, false, 0, 0, "Dense optical flow algorithm."));
+ 
+     BTVL1_GPU::BTVL1_GPU()
+     {
+         temporalAreaRadius_ = 4;
+     }
+ 
+     void BTVL1_GPU::collectGarbage()
+     {
+         curFrame_.release();
+         prevFrame_.release();
+ 
+         frames_.clear();
+         forwardMotions_.clear();
+         backwardMotions_.clear();
+         outputs_.clear();
+ 
+         srcFrames_.clear();
+         srcForwardMotions_.clear();
+         srcBackwardMotions_.clear();
+         finalOutput_.release();
+ 
+         SuperResolution::collectGarbage();
+         BTVL1_GPU_Base::collectGarbage();
+     }
+ 
+     void BTVL1_GPU::initImpl(Ptr<FrameSource>& frameSource)
+     {
+         const int cacheSize = 2 * temporalAreaRadius_ + 1;
+ 
+         frames_.resize(cacheSize);
+         forwardMotions_.resize(cacheSize);
+         backwardMotions_.resize(cacheSize);
+         outputs_.resize(cacheSize);
+ 
+         storePos_ = -1;
+ 
+         for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t)
+             readNextFrame(frameSource);
+ 
+         for (int i = 0; i <= temporalAreaRadius_; ++i)
+             processFrame(i);
+ 
+         procPos_ = temporalAreaRadius_;
+         outPos_ = -1;
+     }
+ 
+     void BTVL1_GPU::processImpl(Ptr<FrameSource>& frameSource, OutputArray _output)
+     {
+         if (outPos_ >= storePos_)
+         {
+             _output.release();
+             return;
+         }
+ 
+         readNextFrame(frameSource);
+ 
+         if (procPos_ < storePos_)
+         {
+             ++procPos_;
+             processFrame(procPos_);
+         }
+ 
+         ++outPos_;
+         const GpuMat& curOutput = at(outPos_, outputs_);
+ 
+         if (_output.kind() == _InputArray::GPU_MAT)
+             curOutput.convertTo(_output.getGpuMatRef(), CV_8U);
+         else
+         {
+             curOutput.convertTo(finalOutput_, CV_8U);
+             arrCopy(finalOutput_, _output);
+         }
+     }
+ 
+     void BTVL1_GPU::readNextFrame(Ptr<FrameSource>& frameSource)
+     {
+         frameSource->nextFrame(curFrame_);
+ 
+         if (curFrame_.empty())
+             return;
+ 
+         ++storePos_;
+         curFrame_.convertTo(at(storePos_, frames_), CV_32F);
+ 
+         if (storePos_ > 0)
+         {
+             pair<GpuMat, GpuMat>& forwardMotion = at(storePos_ - 1, forwardMotions_);
+             pair<GpuMat, GpuMat>& backwardMotion = at(storePos_, backwardMotions_);
+ 
+             opticalFlow_->calc(prevFrame_, curFrame_, forwardMotion.first, forwardMotion.second);
+             opticalFlow_->calc(curFrame_, prevFrame_, backwardMotion.first, backwardMotion.second);
+         }
+ 
+         curFrame_.copyTo(prevFrame_);
+     }
+ 
+     void BTVL1_GPU::processFrame(int idx)
+     {
+         const int startIdx = max(idx - temporalAreaRadius_, 0);
+         const int procIdx = idx;
+         const int endIdx = min(startIdx + 2 * temporalAreaRadius_, storePos_);
+ 
+         const int count = endIdx - startIdx + 1;
+ 
+         srcFrames_.resize(count);
+         srcForwardMotions_.resize(count);
+         srcBackwardMotions_.resize(count);
+ 
+         int baseIdx = -1;
+ 
+         for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k)
+         {
+             if (i == procIdx)
+                 baseIdx = k;
+ 
+             srcFrames_[k] = at(i, frames_);
+ 
+             if (i < endIdx)
+                 srcForwardMotions_[k] = at(i, forwardMotions_);
+             if (i > startIdx)
+                 srcBackwardMotions_[k] = at(i, backwardMotions_);
+         }
+ 
+         process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx);
+     }
+ }
+ 
+ Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_GPU()
+ {
+     return new BTVL1_GPU;
+ }
+ 
+ #endif // HAVE_CUDA
diff --cc modules/superres/src/frame_source.cpp

index 0000000,b22d0d0..12845dd

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/frame_source.cpp
+++ b/modules/superres/src/frame_source.cpp
@@@ -1,0 -1,255 +1,254 @@@
- -using namespace std;
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #include "precomp.hpp"
+ 
- -        VideoFrameSource(const string& fileName);
+ using namespace cv;
+ using namespace cv::gpu;
+ using namespace cv::superres;
+ using namespace cv::superres::detail;
+ 
+ cv::superres::FrameSource::~FrameSource()
+ {
+ }
+ 
+ //////////////////////////////////////////////////////
+ // EmptyFrameSource
+ 
+ namespace
+ {
+     class EmptyFrameSource : public FrameSource
+     {
+     public:
+         void nextFrame(OutputArray frame);
+         void reset();
+     };
+ 
+     void EmptyFrameSource::nextFrame(OutputArray frame)
+     {
+         frame.release();
+     }
+ 
+     void EmptyFrameSource::reset()
+     {
+     }
+ }
+ 
+ Ptr<FrameSource> cv::superres::createFrameSource_Empty()
+ {
+     return new EmptyFrameSource;
+ }
+ 
+ //////////////////////////////////////////////////////
+ // VideoFrameSource & CameraFrameSource
+ 
+ #ifndef HAVE_OPENCV_HIGHGUI
+ 
+ Ptr<FrameSource> cv::superres::createFrameSource_Video(const string& fileName)
+ {
+     (void) fileName;
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<FrameSource>();
+ }
+ 
+ Ptr<FrameSource> cv::superres::createFrameSource_Camera(int deviceId)
+ {
+     (void) deviceId;
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<FrameSource>();
+ }
+ 
+ #else // HAVE_OPENCV_HIGHGUI
+ 
+ namespace
+ {
+     class CaptureFrameSource : public FrameSource
+     {
+     public:
+         void nextFrame(OutputArray frame);
+ 
+     protected:
+         VideoCapture vc_;
+ 
+     private:
+         Mat frame_;
+     };
+ 
+     void CaptureFrameSource::nextFrame(OutputArray _frame)
+     {
+         if (_frame.kind() == _InputArray::MAT)
+         {
+             vc_ >> _frame.getMatRef();
+         }
+         else
+         {
+             vc_ >> frame_;
+             arrCopy(frame_, _frame);
+         }
+     }
+ 
+     class VideoFrameSource : public CaptureFrameSource
+     {
+     public:
- -        string fileName_;
++        VideoFrameSource(const std::string& fileName);
+ 
+         void reset();
+ 
+     private:
- -    VideoFrameSource::VideoFrameSource(const string& fileName) : fileName_(fileName)
++        std::string fileName_;
+     };
+ 
- -Ptr<FrameSource> cv::superres::createFrameSource_Video(const string& fileName)
++    VideoFrameSource::VideoFrameSource(const std::string& fileName) : fileName_(fileName)
+     {
+         reset();
+     }
+ 
+     void VideoFrameSource::reset()
+     {
+         vc_.release();
+         vc_.open(fileName_);
+         CV_Assert( vc_.isOpened() );
+     }
+ 
+     class CameraFrameSource : public CaptureFrameSource
+     {
+     public:
+         CameraFrameSource(int deviceId);
+ 
+         void reset();
+ 
+     private:
+         int deviceId_;
+     };
+ 
+     CameraFrameSource::CameraFrameSource(int deviceId) : deviceId_(deviceId)
+     {
+         reset();
+     }
+ 
+     void CameraFrameSource::reset()
+     {
+         vc_.release();
+         vc_.open(deviceId_);
+         CV_Assert( vc_.isOpened() );
+     }
+ }
+ 
- -        VideoFrameSource_GPU(const string& fileName);
++Ptr<FrameSource> cv::superres::createFrameSource_Video(const std::string& fileName)
+ {
+     return new VideoFrameSource(fileName);
+ }
+ 
+ Ptr<FrameSource> cv::superres::createFrameSource_Camera(int deviceId)
+ {
+     return new CameraFrameSource(deviceId);
+ }
+ 
+ #endif // HAVE_OPENCV_HIGHGUI
+ 
+ //////////////////////////////////////////////////////
+ // VideoFrameSource_GPU
+ 
+ #ifndef HAVE_OPENCV_GPU
+ 
+ Ptr<FrameSource> cv::superres::createFrameSource_Video_GPU(const string& fileName)
+ {
+     (void) fileName;
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<FrameSource>();
+ }
+ 
+ #else // HAVE_OPENCV_GPU
+ 
+ namespace
+ {
+     class VideoFrameSource_GPU : public FrameSource
+     {
+     public:
- -        string fileName_;
++        VideoFrameSource_GPU(const std::string& fileName);
+ 
+         void nextFrame(OutputArray frame);
+         void reset();
+ 
+     private:
- -    VideoFrameSource_GPU::VideoFrameSource_GPU(const string& fileName) : fileName_(fileName)
++        std::string fileName_;
+         VideoReader_GPU reader_;
+         GpuMat frame_;
+     };
+ 
- -Ptr<FrameSource> cv::superres::createFrameSource_Video_GPU(const string& fileName)
++    VideoFrameSource_GPU::VideoFrameSource_GPU(const std::string& fileName) : fileName_(fileName)
+     {
+         reset();
+     }
+ 
+     void VideoFrameSource_GPU::nextFrame(OutputArray _frame)
+     {
+         if (_frame.kind() == _InputArray::GPU_MAT)
+         {
+             bool res = reader_.read(_frame.getGpuMatRef());
+             if (!res)
+                 _frame.release();
+         }
+         else
+         {
+             bool res = reader_.read(frame_);
+             if (!res)
+                 _frame.release();
+             else
+                 arrCopy(frame_, _frame);
+         }
+     }
+ 
+     void VideoFrameSource_GPU::reset()
+     {
+         reader_.close();
+         reader_.open(fileName_);
+         CV_Assert( reader_.isOpened() );
+     }
+ }
+ 
++Ptr<FrameSource> cv::superres::createFrameSource_Video_GPU(const std::string& fileName)
+ {
+     return new VideoFrameSource(fileName);
+ }
+ 
+ #endif // HAVE_OPENCV_GPU
diff --cc modules/superres/src/input_array_utility.cpp

index 0000000,8d905bf..5220cb4

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/input_array_utility.cpp
+++ b/modules/superres/src/input_array_utility.cpp
@@@ -1,0 -1,273 +1,272 @@@
- -using namespace std;
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #include "precomp.hpp"
+ 
- -            numeric_limits<uchar>::max(),
- -            numeric_limits<schar>::max(),
- -            numeric_limits<ushort>::max(),
- -            numeric_limits<short>::max(),
- -            numeric_limits<int>::max(),
+ using namespace cv;
+ using namespace cv::gpu;
+ 
+ Mat cv::superres::arrGetMat(InputArray arr, Mat& buf)
+ {
+     switch (arr.kind())
+     {
+     case _InputArray::GPU_MAT:
+         arr.getGpuMat().download(buf);
+         return buf;
+ 
+     case _InputArray::OPENGL_BUFFER:
+         arr.getOGlBuffer().copyTo(buf);
+         return buf;
+ 
+     case _InputArray::OPENGL_TEXTURE:
+         arr.getOGlTexture2D().copyTo(buf);
+         return buf;
+ 
+     default:
+         return arr.getMat();
+     }
+ }
+ 
+ GpuMat cv::superres::arrGetGpuMat(InputArray arr, GpuMat& buf)
+ {
+     switch (arr.kind())
+     {
+     case _InputArray::GPU_MAT:
+         return arr.getGpuMat();
+ 
+     case _InputArray::OPENGL_BUFFER:
+         arr.getOGlBuffer().copyTo(buf);
+         return buf;
+ 
+     case _InputArray::OPENGL_TEXTURE:
+         arr.getOGlTexture2D().copyTo(buf);
+         return buf;
+ 
+     default:
+         buf.upload(arr.getMat());
+         return buf;
+     }
+ }
+ 
+ namespace
+ {
+     void mat2mat(InputArray src, OutputArray dst)
+     {
+         src.getMat().copyTo(dst);
+     }
+     void arr2buf(InputArray src, OutputArray dst)
+     {
+         dst.getOGlBufferRef().copyFrom(src);
+     }
+     void arr2tex(InputArray src, OutputArray dst)
+     {
+         dst.getOGlTexture2D().copyFrom(src);
+     }
+     void mat2gpu(InputArray src, OutputArray dst)
+     {
+         dst.getGpuMatRef().upload(src.getMat());
+     }
+     void buf2arr(InputArray src, OutputArray dst)
+     {
+         src.getOGlBuffer().copyTo(dst);
+     }
+     void tex2arr(InputArray src, OutputArray dst)
+     {
+         src.getOGlTexture2D().copyTo(dst);
+     }
+     void gpu2mat(InputArray src, OutputArray dst)
+     {
+         GpuMat d = src.getGpuMat();
+         dst.create(d.size(), d.type());
+         Mat m = dst.getMat();
+         d.download(m);
+     }
+     void gpu2gpu(InputArray src, OutputArray dst)
+     {
+         src.getGpuMat().copyTo(dst.getGpuMatRef());
+     }
+ }
+ 
+ void cv::superres::arrCopy(InputArray src, OutputArray dst)
+ {
+     typedef void (*func_t)(InputArray src, OutputArray dst);
+     static const func_t funcs[10][10] =
+     {
+         {0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+         {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
+         {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
+         {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
+         {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
+         {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
+         {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
+         {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr},
+         {0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr},
+         {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu}
+     };
+ 
+     const int src_kind = src.kind() >> _InputArray::KIND_SHIFT;
+     const int dst_kind = dst.kind() >> _InputArray::KIND_SHIFT;
+ 
+     CV_DbgAssert( src_kind >= 0 && src_kind < 10 );
+     CV_DbgAssert( dst_kind >= 0 && dst_kind < 10 );
+ 
+     const func_t func = funcs[src_kind][dst_kind];
+     CV_DbgAssert( func != 0 );
+ 
+     func(src, dst);
+ }
+ 
+ namespace
+ {
+     void convertToCn(InputArray src, OutputArray dst, int cn)
+     {
+         CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
+         CV_Assert( cn == 1 || cn == 3 || cn == 4 );
+ 
+         static const int codes[5][5] =
+         {
+             {-1, -1, -1, -1, -1},
+             {-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA},
+             {-1, -1, -1, -1, -1},
+             {-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA},
+             {-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1},
+         };
+ 
+         const int code = codes[src.channels()][cn];
+         CV_DbgAssert( code >= 0 );
+ 
+         switch (src.kind())
+         {
+         case _InputArray::GPU_MAT:
+             #ifdef HAVE_OPENCV_GPU
+                 gpu::cvtColor(src.getGpuMat(), dst.getGpuMatRef(), code, cn);
+             #else
+                 CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+             #endif
+             break;
+ 
+         default:
+             cvtColor(src, dst, code, cn);
+             break;
+         }
+     }
+ 
+     void convertToDepth(InputArray src, OutputArray dst, int depth)
+     {
+         CV_Assert( src.depth() <= CV_64F );
+         CV_Assert( depth == CV_8U || depth == CV_32F );
+ 
+         static const double maxVals[] =
+         {
++            std::numeric_limits<uchar>::max(),
++            std::numeric_limits<schar>::max(),
++            std::numeric_limits<ushort>::max(),
++            std::numeric_limits<short>::max(),
++            std::numeric_limits<int>::max(),
+             1.0,
+             1.0,
+         };
+ 
+         const double scale = maxVals[depth] / maxVals[src.depth()];
+ 
+         switch (src.kind())
+         {
+         case _InputArray::GPU_MAT:
+             src.getGpuMat().convertTo(dst.getGpuMatRef(), depth, scale);
+             break;
+ 
+         default:
+             src.getMat().convertTo(dst, depth, scale);
+             break;
+         }
+     }
+ }
+ 
+ Mat cv::superres::convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1)
+ {
+     if (src.type() == type)
+         return src;
+ 
+     const int depth = CV_MAT_DEPTH(type);
+     const int cn = CV_MAT_CN(type);
+ 
+     if (src.depth() == depth)
+     {
+         convertToCn(src, buf0, cn);
+         return buf0;
+     }
+ 
+     if (src.channels() == cn)
+     {
+         convertToDepth(src, buf1, depth);
+         return buf1;
+     }
+ 
+     convertToCn(src, buf0, cn);
+     convertToDepth(buf0, buf1, depth);
+     return buf1;
+ }
+ 
+ GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, GpuMat& buf1)
+ {
+     if (src.type() == type)
+         return src;
+ 
+     const int depth = CV_MAT_DEPTH(type);
+     const int cn = CV_MAT_CN(type);
+ 
+     if (src.depth() == depth)
+     {
+         convertToCn(src, buf0, cn);
+         return buf0;
+     }
+ 
+     if (src.channels() == cn)
+     {
+         convertToDepth(src, buf1, depth);
+         return buf1;
+     }
+ 
+     convertToCn(src, buf0, cn);
+     convertToDepth(buf0, buf1, depth);
+     return buf1;
+ }
diff --cc modules/superres/src/input_array_utility.hpp

index 0000000,790d621..95dc821

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/input_array_utility.hpp
+++ b/modules/superres/src/input_array_utility.hpp
@@@ -1,0 -1,63 +1,63 @@@
- -#include "opencv2/core/core.hpp"
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #ifndef __OPENCV_SUPERRES_INPUT_ARRAY_UTILITY_HPP__
+ #define __OPENCV_SUPERRES_INPUT_ARRAY_UTILITY_HPP__
+ 
++#include "opencv2/core.hpp"
+ #include "opencv2/core/gpumat.hpp"
+ 
+ namespace cv
+ {
+     namespace superres
+     {
+         CV_EXPORTS Mat arrGetMat(InputArray arr, Mat& buf);
+         CV_EXPORTS gpu::GpuMat arrGetGpuMat(InputArray arr, gpu::GpuMat& buf);
+ 
+         CV_EXPORTS void arrCopy(InputArray src, OutputArray dst);
+ 
+         CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1);
+         CV_EXPORTS gpu::GpuMat convertToType(const gpu::GpuMat& src, int type, gpu::GpuMat& buf0, gpu::GpuMat& buf1);
+     }
+ }
+ 
+ #endif // __OPENCV_SUPERRES_INPUT_ARRAY_UTILITY_HPP__
diff --cc modules/superres/src/optical_flow.cpp

index 0000000,8c8454c..21d2924

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/optical_flow.cpp
+++ b/modules/superres/src/optical_flow.cpp
@@@ -1,0 -1,721 +1,720 @@@
- -using namespace std;
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #include "precomp.hpp"
+ 
+ using namespace cv;
+ using namespace cv::gpu;
+ using namespace cv::superres;
+ using namespace cv::superres::detail;
+ 
+ ///////////////////////////////////////////////////////////////////
+ // CpuOpticalFlow
+ 
+ namespace
+ {
+     class CpuOpticalFlow : public DenseOpticalFlowExt
+     {
+     public:
+         explicit CpuOpticalFlow(int work_type);
+ 
+         void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2);
+         void collectGarbage();
+ 
+     protected:
+         virtual void impl(const Mat& input0, const Mat& input1, OutputArray dst) = 0;
+ 
+     private:
+         int work_type_;
+         Mat buf_[6];
+         Mat flow_;
+         Mat flows_[2];
+     };
+ 
+     CpuOpticalFlow::CpuOpticalFlow(int work_type) : work_type_(work_type)
+     {
+     }
+ 
+     void CpuOpticalFlow::calc(InputArray _frame0, InputArray _frame1, OutputArray _flow1, OutputArray _flow2)
+     {
+         Mat frame0 = arrGetMat(_frame0, buf_[0]);
+         Mat frame1 = arrGetMat(_frame1, buf_[1]);
+ 
+         CV_Assert( frame1.type() == frame0.type() );
+         CV_Assert( frame1.size() == frame0.size() );
+ 
+         Mat input0 = convertToType(frame0, work_type_, buf_[2], buf_[3]);
+         Mat input1 = convertToType(frame1, work_type_, buf_[4], buf_[5]);
+ 
+         if (!_flow2.needed() && _flow1.kind() < _InputArray::OPENGL_BUFFER)
+         {
+             impl(input0, input1, _flow1);
+             return;
+         }
+ 
+         impl(input0, input1, flow_);
+ 
+         if (!_flow2.needed())
+         {
+             arrCopy(flow_, _flow1);
+         }
+         else
+         {
+             split(flow_, flows_);
+ 
+             arrCopy(flows_[0], _flow1);
+             arrCopy(flows_[1], _flow2);
+         }
+     }
+ 
+     void CpuOpticalFlow::collectGarbage()
+     {
+         for (int i = 0; i < 6; ++i)
+             buf_[i].release();
+         flow_.release();
+         flows_[0].release();
+         flows_[1].release();
+     }
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // Farneback
+ 
+ namespace
+ {
+     class Farneback : public CpuOpticalFlow
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         Farneback();
+ 
+     protected:
+         void impl(const Mat& input0, const Mat& input1, OutputArray dst);
+ 
+     private:
+         double pyrScale_;
+         int numLevels_;
+         int winSize_;
+         int numIters_;
+         int polyN_;
+         double polySigma_;
+         int flags_;
+     };
+ 
+     CV_INIT_ALGORITHM(Farneback, "DenseOpticalFlowExt.Farneback",
+                       obj.info()->addParam(obj, "pyrScale", obj.pyrScale_);
+                       obj.info()->addParam(obj, "numLevels", obj.numLevels_);
+                       obj.info()->addParam(obj, "winSize", obj.winSize_);
+                       obj.info()->addParam(obj, "numIters", obj.numIters_);
+                       obj.info()->addParam(obj, "polyN", obj.polyN_);
+                       obj.info()->addParam(obj, "polySigma", obj.polySigma_);
+                       obj.info()->addParam(obj, "flags", obj.flags_));
+ 
+     Farneback::Farneback() : CpuOpticalFlow(CV_8UC1)
+     {
+         pyrScale_ = 0.5;
+         numLevels_ = 5;
+         winSize_ = 13;
+         numIters_ = 10;
+         polyN_ = 5;
+         polySigma_ = 1.1;
+         flags_ = 0;
+     }
+ 
+     void Farneback::impl(const Mat& input0, const Mat& input1, OutputArray dst)
+     {
+         calcOpticalFlowFarneback(input0, input1, dst, pyrScale_, numLevels_, winSize_, numIters_, polyN_, polySigma_, flags_);
+     }
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback()
+ {
+     return new Farneback;
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // Simple
+ 
+ namespace
+ {
+     class Simple : public CpuOpticalFlow
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         Simple();
+ 
+     protected:
+         void impl(const Mat& input0, const Mat& input1, OutputArray dst);
+ 
+     private:
+         int layers_;
+         int averagingBlockSize_;
+         int maxFlow_;
+         double sigmaDist_;
+         double sigmaColor_;
+         int postProcessWindow_;
+         double sigmaDistFix_;
+         double sigmaColorFix_;
+         double occThr_;
+         int upscaleAveragingRadius_;
+         double upscaleSigmaDist_;
+         double upscaleSigmaColor_;
+         double speedUpThr_;
+     };
+ 
+     CV_INIT_ALGORITHM(Simple, "DenseOpticalFlowExt.Simple",
+                       obj.info()->addParam(obj, "layers", obj.layers_);
+                       obj.info()->addParam(obj, "averagingBlockSize", obj.averagingBlockSize_);
+                       obj.info()->addParam(obj, "maxFlow", obj.maxFlow_);
+                       obj.info()->addParam(obj, "sigmaDist", obj.sigmaDist_);
+                       obj.info()->addParam(obj, "sigmaColor", obj.sigmaColor_);
+                       obj.info()->addParam(obj, "postProcessWindow", obj.postProcessWindow_);
+                       obj.info()->addParam(obj, "sigmaDistFix", obj.sigmaDistFix_);
+                       obj.info()->addParam(obj, "sigmaColorFix", obj.sigmaColorFix_);
+                       obj.info()->addParam(obj, "occThr", obj.occThr_);
+                       obj.info()->addParam(obj, "upscaleAveragingRadius", obj.upscaleAveragingRadius_);
+                       obj.info()->addParam(obj, "upscaleSigmaDist", obj.upscaleSigmaDist_);
+                       obj.info()->addParam(obj, "upscaleSigmaColor", obj.upscaleSigmaColor_);
+                       obj.info()->addParam(obj, "speedUpThr", obj.speedUpThr_));
+ 
+     Simple::Simple() : CpuOpticalFlow(CV_8UC3)
+     {
+         layers_ = 3;
+         averagingBlockSize_ = 2;
+         maxFlow_ = 4;
+         sigmaDist_ = 4.1;
+         sigmaColor_ = 25.5;
+         postProcessWindow_ = 18;
+         sigmaDistFix_ = 55.0;
+         sigmaColorFix_ = 25.5;
+         occThr_ = 0.35;
+         upscaleAveragingRadius_ = 18;
+         upscaleSigmaDist_ = 55.0;
+         upscaleSigmaColor_ = 25.5;
+         speedUpThr_ = 10;
+     }
+ 
+     void Simple::impl(const Mat& _input0, const Mat& _input1, OutputArray dst)
+     {
+         Mat input0 = _input0;
+         Mat input1 = _input1;
+         calcOpticalFlowSF(input0, input1, dst.getMatRef(),
+                           layers_,
+                           averagingBlockSize_,
+                           maxFlow_,
+                           sigmaDist_,
+                           sigmaColor_,
+                           postProcessWindow_,
+                           sigmaDistFix_,
+                           sigmaColorFix_,
+                           occThr_,
+                           upscaleAveragingRadius_,
+                           upscaleSigmaDist_,
+                           upscaleSigmaColor_,
+                           speedUpThr_);
+     }
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Simple()
+ {
+     return new Simple;
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // DualTVL1
+ 
+ namespace
+ {
+     class DualTVL1 : public CpuOpticalFlow
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         DualTVL1();
+ 
+         void collectGarbage();
+ 
+     protected:
+         void impl(const Mat& input0, const Mat& input1, OutputArray dst);
+ 
+     private:
+         double tau_;
+         double lambda_;
+         double theta_;
+         int nscales_;
+         int warps_;
+         double epsilon_;
+         int iterations_;
+         bool useInitialFlow_;
+ 
+         Ptr<DenseOpticalFlow> alg_;
+     };
+ 
+     CV_INIT_ALGORITHM(DualTVL1, "DenseOpticalFlowExt.DualTVL1",
+                       obj.info()->addParam(obj, "tau", obj.tau_);
+                       obj.info()->addParam(obj, "lambda", obj.lambda_);
+                       obj.info()->addParam(obj, "theta", obj.theta_);
+                       obj.info()->addParam(obj, "nscales", obj.nscales_);
+                       obj.info()->addParam(obj, "warps", obj.warps_);
+                       obj.info()->addParam(obj, "epsilon", obj.epsilon_);
+                       obj.info()->addParam(obj, "iterations", obj.iterations_);
+                       obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_));
+ 
+     DualTVL1::DualTVL1() : CpuOpticalFlow(CV_8UC1)
+     {
+         alg_ = cv::createOptFlow_DualTVL1();
+         tau_ = alg_->getDouble("tau");
+         lambda_ = alg_->getDouble("lambda");
+         theta_ = alg_->getDouble("theta");
+         nscales_ = alg_->getInt("nscales");
+         warps_ = alg_->getInt("warps");
+         epsilon_ = alg_->getDouble("epsilon");
+         iterations_ = alg_->getInt("iterations");
+         useInitialFlow_ = alg_->getBool("useInitialFlow");
+     }
+ 
+     void DualTVL1::impl(const Mat& input0, const Mat& input1, OutputArray dst)
+     {
+         alg_->set("tau", tau_);
+         alg_->set("lambda", lambda_);
+         alg_->set("theta", theta_);
+         alg_->set("nscales", nscales_);
+         alg_->set("warps", warps_);
+         alg_->set("epsilon", epsilon_);
+         alg_->set("iterations", iterations_);
+         alg_->set("useInitialFlow", useInitialFlow_);
+ 
+         alg_->calc(input0, input1, dst);
+     }
+ 
+     void DualTVL1::collectGarbage()
+     {
+         alg_->collectGarbage();
+         CpuOpticalFlow::collectGarbage();
+     }
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1()
+ {
+     return new DualTVL1;
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // GpuOpticalFlow
+ 
+ #ifndef HAVE_OPENCV_GPU
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback_GPU()
+ {
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<DenseOpticalFlowExt>();
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_GPU()
+ {
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<DenseOpticalFlowExt>();
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Brox_GPU()
+ {
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<DenseOpticalFlowExt>();
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_PyrLK_GPU()
+ {
+     CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+     return Ptr<DenseOpticalFlowExt>();
+ }
+ 
+ #else // HAVE_OPENCV_GPU
+ 
+ namespace
+ {
+     class GpuOpticalFlow : public DenseOpticalFlowExt
+     {
+     public:
+         explicit GpuOpticalFlow(int work_type);
+ 
+         void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2);
+         void collectGarbage();
+ 
+     protected:
+         virtual void impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2) = 0;
+ 
+     private:
+         int work_type_;
+         GpuMat buf_[6];
+         GpuMat u_, v_, flow_;
+     };
+ 
+     GpuOpticalFlow::GpuOpticalFlow(int work_type) : work_type_(work_type)
+     {
+     }
+ 
+     void GpuOpticalFlow::calc(InputArray _frame0, InputArray _frame1, OutputArray _flow1, OutputArray _flow2)
+     {
+         GpuMat frame0 = arrGetGpuMat(_frame0, buf_[0]);
+         GpuMat frame1 = arrGetGpuMat(_frame1, buf_[1]);
+ 
+         CV_Assert( frame1.type() == frame0.type() );
+         CV_Assert( frame1.size() == frame0.size() );
+ 
+         GpuMat input0 = convertToType(frame0, work_type_, buf_[2], buf_[3]);
+         GpuMat input1 = convertToType(frame1, work_type_, buf_[4], buf_[5]);
+ 
+         if (_flow2.needed() && _flow1.kind() == _InputArray::GPU_MAT && _flow2.kind() == _InputArray::GPU_MAT)
+         {
+             impl(input0, input1, _flow1.getGpuMatRef(), _flow2.getGpuMatRef());
+             return;
+         }
+ 
+         impl(input0, input1, u_, v_);
+ 
+         if (_flow2.needed())
+         {
+             arrCopy(u_, _flow1);
+             arrCopy(v_, _flow2);
+         }
+         else
+         {
+             GpuMat src[] = {u_, v_};
+             merge(src, 2, flow_);
+             arrCopy(flow_, _flow1);
+         }
+     }
+ 
+     void GpuOpticalFlow::collectGarbage()
+     {
+         for (int i = 0; i < 6; ++i)
+             buf_[i].release();
+         u_.release();
+         v_.release();
+         flow_.release();
+     }
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // Brox_GPU
+ 
+ namespace
+ {
+     class Brox_GPU : public GpuOpticalFlow
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         Brox_GPU();
+ 
+         void collectGarbage();
+ 
+     protected:
+         void impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2);
+ 
+     private:
+         double alpha_;
+         double gamma_;
+         double scaleFactor_;
+         int innerIterations_;
+         int outerIterations_;
+         int solverIterations_;
+ 
+         BroxOpticalFlow alg_;
+     };
+ 
+     CV_INIT_ALGORITHM(Brox_GPU, "DenseOpticalFlowExt.Brox_GPU",
+                       obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Flow smoothness");
+                       obj.info()->addParam(obj, "gamma", obj.gamma_, false, 0, 0, "Gradient constancy importance");
+                       obj.info()->addParam(obj, "scaleFactor", obj.scaleFactor_, false, 0, 0, "Pyramid scale factor");
+                       obj.info()->addParam(obj, "innerIterations", obj.innerIterations_, false, 0, 0, "Number of lagged non-linearity iterations (inner loop)");
+                       obj.info()->addParam(obj, "outerIterations", obj.outerIterations_, false, 0, 0, "Number of warping iterations (number of pyramid levels)");
+                       obj.info()->addParam(obj, "solverIterations", obj.solverIterations_, false, 0, 0, "Number of linear system solver iterations"));
+ 
+     Brox_GPU::Brox_GPU() : GpuOpticalFlow(CV_32FC1), alg_(0.197f, 50.0f, 0.8f, 10, 77, 10)
+     {
+         alpha_ = alg_.alpha;
+         gamma_ = alg_.gamma;
+         scaleFactor_ = alg_.scale_factor;
+         innerIterations_ = alg_.inner_iterations;
+         outerIterations_ = alg_.outer_iterations;
+         solverIterations_ = alg_.solver_iterations;
+     }
+ 
+     void Brox_GPU::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
+     {
+         alg_.alpha = static_cast<float>(alpha_);
+         alg_.gamma = static_cast<float>(gamma_);
+         alg_.scale_factor = static_cast<float>(scaleFactor_);
+         alg_.inner_iterations = innerIterations_;
+         alg_.outer_iterations = outerIterations_;
+         alg_.solver_iterations = solverIterations_;
+ 
+         alg_(input0, input1, dst1, dst2);
+     }
+ 
+     void Brox_GPU::collectGarbage()
+     {
+         alg_.buf.release();
+         GpuOpticalFlow::collectGarbage();
+     }
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Brox_GPU()
+ {
+     return new Brox_GPU;
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // PyrLK_GPU
+ 
+ namespace
+ {
+     class PyrLK_GPU : public GpuOpticalFlow
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         PyrLK_GPU();
+ 
+         void collectGarbage();
+ 
+     protected:
+         void impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2);
+ 
+     private:
+         int winSize_;
+         int maxLevel_;
+         int iterations_;
+ 
+         PyrLKOpticalFlow alg_;
+     };
+ 
+     CV_INIT_ALGORITHM(PyrLK_GPU, "DenseOpticalFlowExt.PyrLK_GPU",
+                       obj.info()->addParam(obj, "winSize", obj.winSize_);
+                       obj.info()->addParam(obj, "maxLevel", obj.maxLevel_);
+                       obj.info()->addParam(obj, "iterations", obj.iterations_));
+ 
+     PyrLK_GPU::PyrLK_GPU() : GpuOpticalFlow(CV_8UC1)
+     {
+         winSize_ = alg_.winSize.width;
+         maxLevel_ = alg_.maxLevel;
+         iterations_ = alg_.iters;
+     }
+ 
+     void PyrLK_GPU::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
+     {
+         alg_.winSize.width = winSize_;
+         alg_.winSize.height = winSize_;
+         alg_.maxLevel = maxLevel_;
+         alg_.iters = iterations_;
+ 
+         alg_.dense(input0, input1, dst1, dst2);
+     }
+ 
+     void PyrLK_GPU::collectGarbage()
+     {
+         alg_.releaseMemory();
+         GpuOpticalFlow::collectGarbage();
+     }
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_PyrLK_GPU()
+ {
+     return new PyrLK_GPU;
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // Farneback_GPU
+ 
+ namespace
+ {
+     class Farneback_GPU : public GpuOpticalFlow
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         Farneback_GPU();
+ 
+         void collectGarbage();
+ 
+     protected:
+         void impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2);
+ 
+     private:
+         double pyrScale_;
+         int numLevels_;
+         int winSize_;
+         int numIters_;
+         int polyN_;
+         double polySigma_;
+         int flags_;
+ 
+         FarnebackOpticalFlow alg_;
+     };
+ 
+     CV_INIT_ALGORITHM(Farneback_GPU, "DenseOpticalFlowExt.Farneback_GPU",
+                       obj.info()->addParam(obj, "pyrScale", obj.pyrScale_);
+                       obj.info()->addParam(obj, "numLevels", obj.numLevels_);
+                       obj.info()->addParam(obj, "winSize", obj.winSize_);
+                       obj.info()->addParam(obj, "numIters", obj.numIters_);
+                       obj.info()->addParam(obj, "polyN", obj.polyN_);
+                       obj.info()->addParam(obj, "polySigma", obj.polySigma_);
+                       obj.info()->addParam(obj, "flags", obj.flags_));
+ 
+     Farneback_GPU::Farneback_GPU() : GpuOpticalFlow(CV_8UC1)
+     {
+         pyrScale_ = alg_.pyrScale;
+         numLevels_ = alg_.numLevels;
+         winSize_ = alg_.winSize;
+         numIters_ = alg_.numIters;
+         polyN_ = alg_.polyN;
+         polySigma_ = alg_.polySigma;
+         flags_ = alg_.flags;
+     }
+ 
+     void Farneback_GPU::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
+     {
+         alg_.pyrScale = pyrScale_;
+         alg_.numLevels = numLevels_;
+         alg_.winSize = winSize_;
+         alg_.numIters = numIters_;
+         alg_.polyN = polyN_;
+         alg_.polySigma = polySigma_;
+         alg_.flags = flags_;
+ 
+         alg_(input0, input1, dst1, dst2);
+     }
+ 
+     void Farneback_GPU::collectGarbage()
+     {
+         alg_.releaseMemory();
+         GpuOpticalFlow::collectGarbage();
+     }
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback_GPU()
+ {
+     return new Farneback_GPU;
+ }
+ 
+ ///////////////////////////////////////////////////////////////////
+ // DualTVL1_GPU
+ 
+ namespace
+ {
+     class DualTVL1_GPU : public GpuOpticalFlow
+     {
+     public:
+         AlgorithmInfo* info() const;
+ 
+         DualTVL1_GPU();
+ 
+         void collectGarbage();
+ 
+     protected:
+         void impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2);
+ 
+     private:
+         double tau_;
+         double lambda_;
+         double theta_;
+         int nscales_;
+         int warps_;
+         double epsilon_;
+         int iterations_;
+         bool useInitialFlow_;
+ 
+         OpticalFlowDual_TVL1_GPU alg_;
+     };
+ 
+     CV_INIT_ALGORITHM(DualTVL1_GPU, "DenseOpticalFlowExt.DualTVL1_GPU",
+                       obj.info()->addParam(obj, "tau", obj.tau_);
+                       obj.info()->addParam(obj, "lambda", obj.lambda_);
+                       obj.info()->addParam(obj, "theta", obj.theta_);
+                       obj.info()->addParam(obj, "nscales", obj.nscales_);
+                       obj.info()->addParam(obj, "warps", obj.warps_);
+                       obj.info()->addParam(obj, "epsilon", obj.epsilon_);
+                       obj.info()->addParam(obj, "iterations", obj.iterations_);
+                       obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_));
+ 
+     DualTVL1_GPU::DualTVL1_GPU() : GpuOpticalFlow(CV_8UC1)
+     {
+         tau_ = alg_.tau;
+         lambda_ = alg_.lambda;
+         theta_ = alg_.theta;
+         nscales_ = alg_.nscales;
+         warps_ = alg_.warps;
+         epsilon_ = alg_.epsilon;
+         iterations_ = alg_.iterations;
+         useInitialFlow_ = alg_.useInitialFlow;
+     }
+ 
+     void DualTVL1_GPU::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2)
+     {
+         alg_.tau = tau_;
+         alg_.lambda = lambda_;
+         alg_.theta = theta_;
+         alg_.nscales = nscales_;
+         alg_.warps = warps_;
+         alg_.epsilon = epsilon_;
+         alg_.iterations = iterations_;
+         alg_.useInitialFlow = useInitialFlow_;
+ 
+         alg_(input0, input1, dst1, dst2);
+     }
+ 
+     void DualTVL1_GPU::collectGarbage()
+     {
+         alg_.collectGarbage();
+         GpuOpticalFlow::collectGarbage();
+     }
+ }
+ 
+ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_GPU()
+ {
+     return new DualTVL1_GPU;
+ }
+ 
+ #endif // HAVE_OPENCV_GPU
diff --cc modules/superres/src/precomp.hpp

index 0000000,2b4d0d9..3049b7a

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/precomp.hpp
+++ b/modules/superres/src/precomp.hpp
@@@ -1,0 -1,78 +1,79 @@@
- -#include "opencv2/core/core.hpp"
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #ifndef __OPENCV_PRECOMP_H__
+ #define __OPENCV_PRECOMP_H__
+ 
+ #include <vector>
+ #include <limits>
+ 
+ #ifdef HAVE_CVCONFIG_H
+   #include "cvconfig.h"
+ #endif
+ 
+ #include "opencv2/opencv_modules.hpp"
- -#include "opencv2/core/opengl_interop.hpp"
++#include "opencv2/core.hpp"
+ #include "opencv2/core/gpumat.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
++#include "opencv2/core/opengl.hpp"
++#include "opencv2/core/utility.hpp"
+ #include "opencv2/core/internal.hpp"
- -    #include "opencv2/gpu/gpu.hpp"
++#include "opencv2/imgproc.hpp"
+ #include "opencv2/video/tracking.hpp"
+ 
+ #ifdef HAVE_OPENCV_GPU
- -    #include "opencv2/highgui/highgui.hpp"
++    #include "opencv2/gpu.hpp"
+     #ifdef HAVE_CUDA
+         #include "opencv2/gpu/stream_accessor.hpp"
+     #endif
+ #endif
+ 
+ #ifdef HAVE_OPENCV_HIGHGUI
- -#include "opencv2/superres/superres.hpp"
++    #include "opencv2/highgui.hpp"
+ #endif
+ 
++#include "opencv2/superres.hpp"
+ #include "opencv2/superres/optical_flow.hpp"
+ #include "input_array_utility.hpp"
+ 
+ #include "ring_buffer.hpp"
+ 
+ #endif /* __OPENCV_PRECOMP_H__ */
diff --cc modules/superres/src/super_resolution.cpp

index 0000000,73a2147..fbaa080

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/src/super_resolution.cpp
+++ b/modules/superres/src/super_resolution.cpp
@@@ -1,0 -1,85 +1,84 @@@
- -using namespace std;
+ /*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+ 
+ #include "precomp.hpp"
+ 
+ using namespace cv;
+ using namespace cv::superres;
+ 
+ bool cv::superres::initModule_superres()
+ {
+     return !createSuperResolution_BTVL1().empty();
+ }
+ 
+ cv::superres::SuperResolution::SuperResolution()
+ {
+     frameSource_ = createFrameSource_Empty();
+     firstCall_ = true;
+ }
+ 
+ void cv::superres::SuperResolution::setInput(const Ptr<FrameSource>& frameSource)
+ {
+     frameSource_ = frameSource;
+     firstCall_ = true;
+ }
+ 
+ void cv::superres::SuperResolution::nextFrame(OutputArray frame)
+ {
+     if (firstCall_)
+     {
+         initImpl(frameSource_);
+         firstCall_ = false;
+     }
+ 
+     processImpl(frameSource_, frame);
+ }
+ 
+ void cv::superres::SuperResolution::reset()
+ {
+     frameSource_->reset();
+     firstCall_ = true;
+ }
+ 
+ void cv::superres::SuperResolution::collectGarbage()
+ {
+ }
diff --cc modules/superres/test/test_precomp.hpp

index 0000000,84c0a76..e770a9e

mode 000000,100644..100644
--- /dev/null
--- 2/modules/superres/test/test_precomp.hpp
+++ b/modules/superres/test/test_precomp.hpp
@@@ -1,0 -1,23 +1,23 @@@
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/superres/superres.hpp"
+ #ifdef __GNUC__
+ #  pragma GCC diagnostic ignored "-Wmissing-declarations"
+ #  if defined __clang__ || defined __APPLE__
+ #    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+ #    pragma GCC diagnostic ignored "-Wextra"
+ #  endif
+ #endif
+ 
+ #ifndef __OPENCV_TEST_PRECOMP_HPP__
+ #define __OPENCV_TEST_PRECOMP_HPP__
+ 
+ #ifdef HAVE_CVCONFIG_H
+ #include "cvconfig.h"
+ #endif
+ 
+ #include "opencv2/opencv_modules.hpp"
++#include "opencv2/core.hpp"
++#include "opencv2/imgproc.hpp"
++#include "opencv2/ts.hpp"
++#include "opencv2/superres.hpp"
+ #include "input_array_utility.hpp"
+ 
+ #endif
diff --cc modules/ts/include/opencv2/ts/gpu_perf.hpp

index 0000000,9fcf85d..330ca6c

mode 000000,100644..100644
--- /dev/null
--- 2/modules/ts/include/opencv2/ts/gpu_perf.hpp
+++ b/modules/ts/include/opencv2/ts/gpu_perf.hpp
@@@ -1,0 -1,68 +1,68 @@@
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
+ #ifndef __OPENCV_GPU_PERF_UTILITY_HPP__
+ #define __OPENCV_GPU_PERF_UTILITY_HPP__
+ 
++#include "opencv2/core.hpp"
++#include "opencv2/highgui.hpp"
++#include "opencv2/imgproc.hpp"
+ #include "opencv2/ts/ts_perf.hpp"
+ 
+ namespace perf
+ {
+     CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+     #define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
+ 
+     CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
+     #define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
+ 
+     CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING, cv::NORM_MINMAX)
+ 
+     enum { Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4 };
+     CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
+     #define GPU_CHANNELS_1_3_4 testing::Values(MatCn(Gray), MatCn(BGR), MatCn(BGRA))
+     #define GPU_CHANNELS_1_3 testing::Values(MatCn(Gray), MatCn(BGR))
+ 
+     #define GET_PARAM(k) std::tr1::get< k >(GetParam())
+ 
+     #define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
+     #define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
+ 
+     DEF_PARAM_TEST_1(Sz, cv::Size);
+     typedef perf::Size_MatType Sz_Type;
+     DEF_PARAM_TEST(Sz_Depth, cv::Size, perf::MatDepth);
+     DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, perf::MatDepth, MatCn);
+ 
+     #define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
+ 
+     #define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
+ 
+     #define GPU_SANITY_CHECK(mat, ...) \
+         do{ \
+             cv::Mat gpu_##mat(mat); \
+             SANITY_CHECK(gpu_##mat, ## __VA_ARGS__); \
+         } while(0)
+ 
+     #define CPU_SANITY_CHECK(mat, ...) \
+         do{ \
+             cv::Mat cpu_##mat(mat); \
+             SANITY_CHECK(cpu_##mat, ## __VA_ARGS__); \
+         } while(0)
+ 
+     CV_EXPORTS cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
+ 
+     struct CvtColorInfo
+     {
+         int scn;
+         int dcn;
+         int code;
+ 
+         CvtColorInfo() {}
+         explicit CvtColorInfo(int scn_, int dcn_, int code_) : scn(scn_), dcn(dcn_), code(code_) {}
+     };
+     CV_EXPORTS void PrintTo(const CvtColorInfo& info, std::ostream* os);
+ 
+     CV_EXPORTS void printCudaInfo();
+ 
+     CV_EXPORTS void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray());
+ }
+ 
+ #endif // __OPENCV_GPU_PERF_UTILITY_HPP__
diff --cc modules/ts/include/opencv2/ts/gpu_test.hpp

index 0000000,4743c3d..5ec050f

mode 000000,100644..100644
--- /dev/null
--- 2/modules/ts/include/opencv2/ts/gpu_test.hpp
+++ b/modules/ts/include/opencv2/ts/gpu_test.hpp
@@@ -1,0 -1,307 +1,306 @@@
- -#include "opencv2/core/core.hpp"
+ #ifndef __OPENCV_GPU_TEST_UTILITY_HPP__
+ #define __OPENCV_GPU_TEST_UTILITY_HPP__
+ 
- -#include "opencv2/highgui/highgui.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/ts/ts.hpp"
- -#include "opencv2/ts/ts_perf.hpp"
++#include "opencv2/core.hpp"
+ #include "opencv2/core/gpumat.hpp"
++#include "opencv2/highgui.hpp"
++#include "opencv2/imgproc.hpp"
++#include "opencv2/ts.hpp"
+ 
+ namespace cvtest
+ {
+     //////////////////////////////////////////////////////////////////////
+     // random generators
+ 
+     CV_EXPORTS int randomInt(int minVal, int maxVal);
+     CV_EXPORTS double randomDouble(double minVal, double maxVal);
+     CV_EXPORTS cv::Size randomSize(int minVal, int maxVal);
+     CV_EXPORTS cv::Scalar randomScalar(double minVal, double maxVal);
+     CV_EXPORTS cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
+ 
+     //////////////////////////////////////////////////////////////////////
+     // GpuMat create
+ 
+     CV_EXPORTS cv::gpu::GpuMat createMat(cv::Size size, int type, bool useRoi = false);
+     CV_EXPORTS cv::gpu::GpuMat loadMat(const cv::Mat& m, bool useRoi = false);
+ 
+     //////////////////////////////////////////////////////////////////////
+     // Image load
+ 
+     //! read image from testdata folder
+     CV_EXPORTS cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
+ 
+     //! read image from testdata folder and convert it to specified type
+     CV_EXPORTS cv::Mat readImageType(const std::string& fname, int type);
+ 
+     //////////////////////////////////////////////////////////////////////
+     // Gpu devices
+ 
+     //! return true if device supports specified feature and gpu module was built with support the feature.
+     CV_EXPORTS bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+ 
+     class CV_EXPORTS DeviceManager
+     {
+     public:
+         static DeviceManager& instance();
+ 
+         void load(int i);
+         void loadAll();
+ 
+         const std::vector<cv::gpu::DeviceInfo>& values() const { return devices_; }
+ 
+     private:
+         std::vector<cv::gpu::DeviceInfo> devices_;
+     };
+ 
+     #define ALL_DEVICES testing::ValuesIn(cvtest::DeviceManager::instance().values())
+ 
+     //////////////////////////////////////////////////////////////////////
+     // Additional assertion
+ 
+     CV_EXPORTS void minMaxLocGold(const cv::Mat& src, double* minVal_, double* maxVal_ = 0, cv::Point* minLoc_ = 0, cv::Point* maxLoc_ = 0, const cv::Mat& mask = cv::Mat());
+ 
+     CV_EXPORTS cv::Mat getMat(cv::InputArray arr);
+ 
+     CV_EXPORTS testing::AssertionResult assertMatNear(const char* expr1, const char* expr2, const char* eps_expr, cv::InputArray m1, cv::InputArray m2, double eps);
+ 
+     #define EXPECT_MAT_NEAR(m1, m2, eps) EXPECT_PRED_FORMAT3(cvtest::assertMatNear, m1, m2, eps)
+     #define ASSERT_MAT_NEAR(m1, m2, eps) ASSERT_PRED_FORMAT3(cvtest::assertMatNear, m1, m2, eps)
+ 
+     #define EXPECT_SCALAR_NEAR(s1, s2, eps) \
+         { \
+             EXPECT_NEAR(s1[0], s2[0], eps); \
+             EXPECT_NEAR(s1[1], s2[1], eps); \
+             EXPECT_NEAR(s1[2], s2[2], eps); \
+             EXPECT_NEAR(s1[3], s2[3], eps); \
+         }
+     #define ASSERT_SCALAR_NEAR(s1, s2, eps) \
+         { \
+             ASSERT_NEAR(s1[0], s2[0], eps); \
+             ASSERT_NEAR(s1[1], s2[1], eps); \
+             ASSERT_NEAR(s1[2], s2[2], eps); \
+             ASSERT_NEAR(s1[3], s2[3], eps); \
+         }
+ 
+     #define EXPECT_POINT2_NEAR(p1, p2, eps) \
+         { \
+             EXPECT_NEAR(p1.x, p2.x, eps); \
+             EXPECT_NEAR(p1.y, p2.y, eps); \
+         }
+     #define ASSERT_POINT2_NEAR(p1, p2, eps) \
+         { \
+             ASSERT_NEAR(p1.x, p2.x, eps); \
+             ASSERT_NEAR(p1.y, p2.y, eps); \
+         }
+ 
+     #define EXPECT_POINT3_NEAR(p1, p2, eps) \
+         { \
+             EXPECT_NEAR(p1.x, p2.x, eps); \
+             EXPECT_NEAR(p1.y, p2.y, eps); \
+             EXPECT_NEAR(p1.z, p2.z, eps); \
+         }
+     #define ASSERT_POINT3_NEAR(p1, p2, eps) \
+         { \
+             ASSERT_NEAR(p1.x, p2.x, eps); \
+             ASSERT_NEAR(p1.y, p2.y, eps); \
+             ASSERT_NEAR(p1.z, p2.z, eps); \
+         }
+ 
+     CV_EXPORTS double checkSimilarity(cv::InputArray m1, cv::InputArray m2);
+ 
+     #define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
+         { \
+             ASSERT_EQ(mat1.type(), mat2.type()); \
+             ASSERT_EQ(mat1.size(), mat2.size()); \
+             EXPECT_LE(checkSimilarity(mat1, mat2), eps); \
+         }
+     #define ASSERT_MAT_SIMILAR(mat1, mat2, eps) \
+         { \
+             ASSERT_EQ(mat1.type(), mat2.type()); \
+             ASSERT_EQ(mat1.size(), mat2.size()); \
+             ASSERT_LE(checkSimilarity(mat1, mat2), eps); \
+         }
+ 
+     //////////////////////////////////////////////////////////////////////
+     // Helper structs for value-parameterized tests
+ 
+     #define GPU_TEST_P(test_case_name, test_name) \
+       class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
+           : public test_case_name { \
+        public: \
+         GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
+         virtual void TestBody(); \
+        private: \
+         void UnsafeTestBody(); \
+         static int AddToRegistry() { \
+           ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
+               GetTestCasePatternHolder<test_case_name>(\
+                   #test_case_name, __FILE__, __LINE__)->AddTestPattern(\
+                       #test_case_name, \
+                       #test_name, \
+                       new ::testing::internal::TestMetaFactory< \
+                           GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
+           return 0; \
+         } \
+         static int gtest_registering_dummy_; \
+         GTEST_DISALLOW_COPY_AND_ASSIGN_(\
+             GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
+       }; \
+       int GTEST_TEST_CLASS_NAME_(test_case_name, \
+                                  test_name)::gtest_registering_dummy_ = \
+           GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
+       void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() \
+       { \
+         try \
+         { \
+           UnsafeTestBody(); \
+         } \
+         catch (...) \
+         { \
+           cv::gpu::resetDevice(); \
+           throw; \
+         } \
+       } \
+       void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::UnsafeTestBody()
+ 
+     #define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+     #define GET_PARAM(k) std::tr1::get< k >(GetParam())
+ 
+     #define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
+ 
+     // Depth
+ 
+     using perf::MatDepth;
+ 
+     #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F))
+ 
+     #define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)),   \
+                                         std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)),  \
+                                         std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)),  \
+                                         std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)),  \
+                                         std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)),  \
+                                         std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)),  \
+                                                                                             \
+                                         std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
+                                         std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
+                                         std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
+                                         std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
+                                                                                             \
+                                         std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
+                                         std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
+                                         std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
+                                         std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
+                                                                                             \
+                                         std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
+                                         std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
+                                         std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
+                                                                                             \
+                                         std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
+                                         std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
+                                                                                             \
+                                         std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F)))
+ 
+     // Type
+ 
+     using perf::MatType;
+ 
+     //! return vector with types from specified range.
+     CV_EXPORTS std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
+ 
+     //! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
+     CV_EXPORTS const std::vector<MatType>& all_types();
+ 
+     #define ALL_TYPES testing::ValuesIn(all_types())
+     #define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
+ 
+     // ROI
+ 
+     class UseRoi
+     {
+     public:
+         inline UseRoi(bool val = false) : val_(val) {}
+ 
+         inline operator bool() const { return val_; }
+ 
+     private:
+         bool val_;
+     };
+ 
+     CV_EXPORTS void PrintTo(const UseRoi& useRoi, std::ostream* os);
+ 
+     #define WHOLE_SUBMAT testing::Values(UseRoi(false), UseRoi(true))
+ 
+     // Direct/Inverse
+ 
+     class Inverse
+     {
+     public:
+         inline Inverse(bool val = false) : val_(val) {}
+ 
+         inline operator bool() const { return val_; }
+ 
+     private:
+         bool val_;
+     };
+ 
+     CV_EXPORTS void PrintTo(const Inverse& useRoi, std::ostream* os);
+ 
+     #define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
+ 
+     // Param class
+ 
+     #define IMPLEMENT_PARAM_CLASS(name, type) \
+         class name \
+         { \
+         public: \
+             name ( type arg = type ()) : val_(arg) {} \
+             operator type () const {return val_;} \
+         private: \
+             type val_; \
+         }; \
+         inline void PrintTo( name param, std::ostream* os) \
+         { \
+             *os << #name <<  "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
+         }
+ 
+     IMPLEMENT_PARAM_CLASS(Channels, int)
+ 
+     #define ALL_CHANNELS testing::Values(Channels(1), Channels(2), Channels(3), Channels(4))
+     #define IMAGE_CHANNELS testing::Values(Channels(1), Channels(3), Channels(4))
+ 
+     // Flags and enums
+ 
+     CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
+ 
+     CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
+ 
+     CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+     #define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP))
+ 
+     CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
+ 
+     //////////////////////////////////////////////////////////////////////
+     // Features2D
+ 
+     CV_EXPORTS testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char* actual_expr, std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual);
+ 
+     #define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual)
+ 
+     CV_EXPORTS int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual);
+     CV_EXPORTS int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches);
+ 
+     //////////////////////////////////////////////////////////////////////
+     // Other
+ 
+     CV_EXPORTS void dumpImage(const std::string& fileName, const cv::Mat& image);
+     CV_EXPORTS void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
+ 
+     CV_EXPORTS void printCudaInfo();
+ }
+ 
+ namespace cv { namespace gpu
+ {
+     CV_EXPORTS void PrintTo(const DeviceInfo& info, std::ostream* os);
+ }}
+ 
+ #endif // __OPENCV_GPU_TEST_UTILITY_HPP__
diff --cc modules/ts/src/gpu_perf.cpp

index 0000000,a81dd13..99d74b1

mode 000000,100644..100644
--- /dev/null
--- 2/modules/ts/src/gpu_perf.cpp
+++ b/modules/ts/src/gpu_perf.cpp
@@@ -1,0 -1,313 +1,314 @@@
+ #include "opencv2/ts/gpu_perf.hpp"
+ #include "opencv2/core/gpumat.hpp"
+ 
+ #include "cvconfig.h"
+ 
+ #ifdef HAVE_CUDA
+     #include <cuda_runtime.h>
+ #endif
+ 
+ using namespace cv;
++using namespace std;
+ 
+ namespace perf
+ {
+     Mat readImage(const string& fileName, int flags)
+     {
+         return imread(perf::TestBase::getDataPath(fileName), flags);
+     }
+ 
+     void PrintTo(const CvtColorInfo& info, std::ostream* os)
+     {
+         static const char* str[] =
+         {
+             "BGR2BGRA",
+             "BGRA2BGR",
+             "BGR2RGBA",
+             "RGBA2BGR",
+             "BGR2RGB",
+             "BGRA2RGBA",
+ 
+             "BGR2GRAY",
+             "RGB2GRAY",
+             "GRAY2BGR",
+             "GRAY2BGRA",
+             "BGRA2GRAY",
+             "RGBA2GRAY",
+ 
+             "BGR2BGR565",
+             "RGB2BGR565",
+             "BGR5652BGR",
+             "BGR5652RGB",
+             "BGRA2BGR565",
+             "RGBA2BGR565",
+             "BGR5652BGRA",
+             "BGR5652RGBA",
+ 
+             "GRAY2BGR565",
+             "BGR5652GRAY",
+ 
+             "BGR2BGR555",
+             "RGB2BGR555",
+             "BGR5552BGR",
+             "BGR5552RGB",
+             "BGRA2BGR555",
+             "RGBA2BGR555",
+             "BGR5552BGRA",
+             "BGR5552RGBA",
+ 
+             "GRAY2BGR555",
+             "BGR5552GRAY",
+ 
+             "BGR2XYZ",
+             "RGB2XYZ",
+             "XYZ2BGR",
+             "XYZ2RGB",
+ 
+             "BGR2YCrCb",
+             "RGB2YCrCb",
+             "YCrCb2BGR",
+             "YCrCb2RGB",
+ 
+             "BGR2HSV",
+             "RGB2HSV",
+ 
+             "",
+             "",
+ 
+             "BGR2Lab",
+             "RGB2Lab",
+ 
+             "BayerBG2BGR",
+             "BayerGB2BGR",
+             "BayerRG2BGR",
+             "BayerGR2BGR",
+ 
+             "BGR2Luv",
+             "RGB2Luv",
+ 
+             "BGR2HLS",
+             "RGB2HLS",
+ 
+             "HSV2BGR",
+             "HSV2RGB",
+ 
+             "Lab2BGR",
+             "Lab2RGB",
+             "Luv2BGR",
+             "Luv2RGB",
+ 
+             "HLS2BGR",
+             "HLS2RGB",
+ 
+             "BayerBG2BGR_VNG",
+             "BayerGB2BGR_VNG",
+             "BayerRG2BGR_VNG",
+             "BayerGR2BGR_VNG",
+ 
+             "BGR2HSV_FULL",
+             "RGB2HSV_FULL",
+             "BGR2HLS_FULL",
+             "RGB2HLS_FULL",
+ 
+             "HSV2BGR_FULL",
+             "HSV2RGB_FULL",
+             "HLS2BGR_FULL",
+             "HLS2RGB_FULL",
+ 
+             "LBGR2Lab",
+             "LRGB2Lab",
+             "LBGR2Luv",
+             "LRGB2Luv",
+ 
+             "Lab2LBGR",
+             "Lab2LRGB",
+             "Luv2LBGR",
+             "Luv2LRGB",
+ 
+             "BGR2YUV",
+             "RGB2YUV",
+             "YUV2BGR",
+             "YUV2RGB",
+ 
+             "BayerBG2GRAY",
+             "BayerGB2GRAY",
+             "BayerRG2GRAY",
+             "BayerGR2GRAY",
+ 
+             //YUV 4:2:0 formats family
+             "YUV2RGB_NV12",
+             "YUV2BGR_NV12",
+             "YUV2RGB_NV21",
+             "YUV2BGR_NV21",
+ 
+             "YUV2RGBA_NV12",
+             "YUV2BGRA_NV12",
+             "YUV2RGBA_NV21",
+             "YUV2BGRA_NV21",
+ 
+             "YUV2RGB_YV12",
+             "YUV2BGR_YV12",
+             "YUV2RGB_IYUV",
+             "YUV2BGR_IYUV",
+ 
+             "YUV2RGBA_YV12",
+             "YUV2BGRA_YV12",
+             "YUV2RGBA_IYUV",
+             "YUV2BGRA_IYUV",
+ 
+             "YUV2GRAY_420",
+ 
+             //YUV 4:2:2 formats family
+             "YUV2RGB_UYVY",
+             "YUV2BGR_UYVY",
+             "YUV2RGB_VYUY",
+             "YUV2BGR_VYUY",
+ 
+             "YUV2RGBA_UYVY",
+             "YUV2BGRA_UYVY",
+             "YUV2RGBA_VYUY",
+             "YUV2BGRA_VYUY",
+ 
+             "YUV2RGB_YUY2",
+             "YUV2BGR_YUY2",
+             "YUV2RGB_YVYU",
+             "YUV2BGR_YVYU",
+ 
+             "YUV2RGBA_YUY2",
+             "YUV2BGRA_YUY2",
+             "YUV2RGBA_YVYU",
+             "YUV2BGRA_YVYU",
+ 
+             "YUV2GRAY_UYVY",
+             "YUV2GRAY_YUY2",
+ 
+             // alpha premultiplication
+             "RGBA2mRGBA",
+             "mRGBA2RGBA",
+ 
+             "COLORCVT_MAX"
+         };
+ 
+         *os << str[info.code];
+     }
+ 
+     static void printOsInfo()
+     {
+     #if defined _WIN32
+     #   if defined _WIN64
+             printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"), fflush(stdout);
+     #   else
+             printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"), fflush(stdout);
+     #   endif
+     #elif defined linux
+     #   if defined _LP64
+             printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"), fflush(stdout);
+     #   else
+             printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"), fflush(stdout);
+     #   endif
+     #elif defined __APPLE__
+     #   if defined _LP64
+             printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"), fflush(stdout);
+     #   else
+             printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"), fflush(stdout);
+     #   endif
+     #endif
+ 
+     }
+ 
+     void printCudaInfo()
+     {
+         printOsInfo();
+     #ifndef HAVE_CUDA
+         printf("[----------]\n[ GPU INFO ] \tOpenCV was built without CUDA support.\n[----------]\n"), fflush(stdout);
+     #else
+         int driver;
+         cudaDriverGetVersion(&driver);
+ 
+         printf("[----------]\n"), fflush(stdout);
+         printf("[ GPU INFO ] \tCUDA Driver  version: %d.\n", driver), fflush(stdout);
+         printf("[ GPU INFO ] \tCUDA Runtime version: %d.\n", CUDART_VERSION), fflush(stdout);
+         printf("[----------]\n"), fflush(stdout);
+ 
+         printf("[----------]\n"), fflush(stdout);
+         printf("[ GPU INFO ] \tGPU module was compiled for the following GPU archs.\n"), fflush(stdout);
+         printf("[      BIN ] \t%s.\n", CUDA_ARCH_BIN), fflush(stdout);
+         printf("[      PTX ] \t%s.\n", CUDA_ARCH_PTX), fflush(stdout);
+         printf("[----------]\n"), fflush(stdout);
+ 
+         printf("[----------]\n"), fflush(stdout);
+         int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
+         printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount), fflush(stdout);
+         printf("[----------]\n"), fflush(stdout);
+ 
+         for (int i = 0; i < deviceCount; ++i)
+         {
+             cv::gpu::DeviceInfo info(i);
+ 
+             printf("[----------]\n"), fflush(stdout);
+             printf("[ DEVICE   ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
+             printf("[          ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
+             printf("[          ] \tMulti Processor Count:  %d\n", info.multiProcessorCount()), fflush(stdout);
+             printf("[          ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
+             printf("[          ] \tFree  memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory()  / 1024.0) / 1024.0)), fflush(stdout);
+             if (!info.isCompatible())
+                 printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n");
+             printf("[----------]\n"), fflush(stdout);
+         }
+ 
+     #endif
+     }
+ 
+     struct KeypointIdxCompare
+     {
+         std::vector<cv::KeyPoint>* keypoints;
+ 
+         explicit KeypointIdxCompare(std::vector<cv::KeyPoint>* _keypoints) : keypoints(_keypoints) {}
+ 
+         bool operator ()(size_t i1, size_t i2) const
+         {
+             cv::KeyPoint kp1 = (*keypoints)[i1];
+             cv::KeyPoint kp2 = (*keypoints)[i2];
+             if (kp1.pt.x != kp2.pt.x)
+                 return kp1.pt.x < kp2.pt.x;
+             if (kp1.pt.y != kp2.pt.y)
+                 return kp1.pt.y < kp2.pt.y;
+             if (kp1.response != kp2.response)
+                 return kp1.response < kp2.response;
+             return kp1.octave < kp2.octave;
+         }
+     };
+ 
+     void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors)
+     {
+         std::vector<size_t> indexies(keypoints.size());
+         for (size_t i = 0; i < indexies.size(); ++i)
+             indexies[i] = i;
+ 
+         std::sort(indexies.begin(), indexies.end(), KeypointIdxCompare(&keypoints));
+ 
+         std::vector<cv::KeyPoint> new_keypoints;
+         cv::Mat new_descriptors;
+ 
+         new_keypoints.resize(keypoints.size());
+ 
+         cv::Mat descriptors;
+         if (_descriptors.needed())
+         {
+             descriptors = _descriptors.getMat();
+             new_descriptors.create(descriptors.size(), descriptors.type());
+         }
+ 
+         for (size_t i = 0; i < indexies.size(); ++i)
+         {
+             size_t new_idx = indexies[i];
+             new_keypoints[i] = keypoints[new_idx];
+             if (!new_descriptors.empty())
+                 descriptors.row((int) new_idx).copyTo(new_descriptors.row((int) i));
+         }
+ 
+         keypoints.swap(new_keypoints);
+         if (_descriptors.needed())
+             new_descriptors.copyTo(_descriptors);
+     }
+ }
diff --cc samples/cpp/CMakeLists.txt
Simple merge
diff --cc samples/cpp/stitching_detailed.cpp
Simple merge
diff --cc samples/gpu/CMakeLists.txt

index 6d20fc3,85bee50..ee59106
--- 1/samples/gpu/CMakeLists.txt
--- 2/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@@ -1,7 -1,7 +1,7 @@@
   SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc opencv_highgui
                                        opencv_ml opencv_video opencv_objdetect opencv_features2d
                                        opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
-                                      opencv_nonfree opencv_softcascade)
- -                                     opencv_superres)
++                                     opencv_nonfree opencv_softcascade opencv_superres)
   
   ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
   
diff --cc samples/gpu/bgfg_segm.cpp
Simple merge
diff --cc samples/gpu/super_resolution.cpp

index 0000000,2dd3656..3ab5ed0

mode 000000,100644..100644
--- /dev/null
--- 2/samples/gpu/super_resolution.cpp
+++ b/samples/gpu/super_resolution.cpp
@@@ -1,0 -1,152 +1,153 @@@
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/contrib/contrib.hpp"
- -#include "opencv2/superres/superres.hpp"
+ #include <iostream>
+ #include <iomanip>
+ #include <string>
- -        "{ v   | video      |           | Input video }"
- -        "{ o   | output     |           | Output video }"
- -        "{ s   | scale      | 4         | Scale factor }"
- -        "{ i   | iterations | 180       | Iteration count }"
- -        "{ t   | temporal   | 4         | Radius of the temporal search area }"
- -        "{ f   | flow       | farneback | Optical flow algorithm (farneback, simple, tvl1, brox, pyrlk) }"
- -        "{ gpu | gpu        | false     | Use GPU }"
- -        "{ h   | help       | false     | Print help message }"
++#include "opencv2/core.hpp"
++#include "opencv2/core/utility.hpp"
++#include "opencv2/highgui.hpp"
++#include "opencv2/imgproc.hpp"
++#include "opencv2/contrib.hpp"
++#include "opencv2/superres.hpp"
+ #include "opencv2/superres/optical_flow.hpp"
+ 
+ using namespace std;
+ using namespace cv;
+ using namespace cv::superres;
+ 
+ #define MEASURE_TIME(op) \
+     { \
+         TickMeter tm; \
+         tm.start(); \
+         op; \
+         tm.stop(); \
+         cout << tm.getTimeSec() << " sec" << endl; \
+     }
+ 
+ static Ptr<DenseOpticalFlowExt> createOptFlow(const string& name, bool useGpu)
+ {
+     if (name == "farneback")
+     {
+         if (useGpu)
+             return createOptFlow_Farneback_GPU();
+         else
+             return createOptFlow_Farneback();
+     }
+     else if (name == "simple")
+         return createOptFlow_Simple();
+     else if (name == "tvl1")
+     {
+         if (useGpu)
+             return createOptFlow_DualTVL1_GPU();
+         else
+             return createOptFlow_DualTVL1();
+     }
+     else if (name == "brox")
+         return createOptFlow_Brox_GPU();
+     else if (name == "pyrlk")
+         return createOptFlow_PyrLK_GPU();
+     else
+     {
+         cerr << "Incorrect Optical Flow algorithm - " << name << endl;
+         exit(-1);
+     }
+ 
+     return Ptr<DenseOpticalFlowExt>();
+ }
+ 
+ int main(int argc, const char* argv[])
+ {
+     CommandLineParser cmd(argc, argv,
- -        cmd.printParams();
++        "{ v video      |           | Input video }"
++        "{ o output     |           | Output video }"
++        "{ s scale      | 4         | Scale factor }"
++        "{ i iterations | 180       | Iteration count }"
++        "{ t temporal   | 4         | Radius of the temporal search area }"
++        "{ f flow       | farneback | Optical flow algorithm (farneback, simple, tvl1, brox, pyrlk) }"
++        "{ gpu          | false     | Use GPU }"
++        "{ h help       | false     | Print help message }"
+     );
+ 
+     if (cmd.get<bool>("help"))
+     {
+         cout << "This sample demonstrates Super Resolution algorithms for video sequence" << endl;
++        cmd.printMessage();
+         return 0;
+     }
+ 
+     const string inputVideoName = cmd.get<string>("video");
+     const string outputVideoName = cmd.get<string>("output");
+     const int scale = cmd.get<int>("scale");
+     const int iterations = cmd.get<int>("iterations");
+     const int temporalAreaRadius = cmd.get<int>("temporal");
+     const string optFlow = cmd.get<string>("flow");
+     const bool useGpu = cmd.get<bool>("gpu");
+ 
+     Ptr<SuperResolution> superRes;
+     if (useGpu)
+         superRes = createSuperResolution_BTVL1_GPU();
+     else
+         superRes = createSuperResolution_BTVL1();
+ 
+     superRes->set("scale", scale);
+     superRes->set("iterations", iterations);
+     superRes->set("temporalAreaRadius", temporalAreaRadius);
+     superRes->set("opticalFlow", createOptFlow(optFlow, useGpu));
+ 
+     Ptr<FrameSource> frameSource;
+     if (useGpu)
+     {
+         // Try to use gpu Video Decoding
+         try
+         {
+             frameSource = createFrameSource_Video_GPU(inputVideoName);
+             Mat frame;
+             frameSource->nextFrame(frame);
+         }
+         catch (const cv::Exception&)
+         {
+             frameSource.release();
+         }
+     }
+     if (frameSource.empty())
+         frameSource = createFrameSource_Video(inputVideoName);
+ 
+     // skip first frame, it is usually corrupted
+     {
+         Mat frame;
+         frameSource->nextFrame(frame);
+         cout << "Input           : " << inputVideoName << " " << frame.size() << endl;
+         cout << "Scale factor    : " << scale << endl;
+         cout << "Iterations      : " << iterations << endl;
+         cout << "Temporal radius : " << temporalAreaRadius << endl;
+         cout << "Optical Flow    : " << optFlow << endl;
+         cout << "Mode            : " << (useGpu ? "GPU" : "CPU") << endl;
+     }
+ 
+     superRes->setInput(frameSource);
+ 
+     VideoWriter writer;
+ 
+     for (int i = 0;; ++i)
+     {
+         cout << '[' << setw(3) << i << "] : ";
+ 
+         Mat result;
+         MEASURE_TIME(superRes->nextFrame(result));
+ 
+         if (result.empty())
+             break;
+ 
+         imshow("Super Resolution", result);
+ 
+         if (waitKey(1000) > 0)
+             break;
+ 
+         if (!outputVideoName.empty())
+         {
+             if (!writer.isOpened())
+                 writer.open(outputVideoName, CV_FOURCC('X', 'V', 'I', 'D'), 25.0, result.size());
+             writer << result;
+         }
+     }
+ 
+     return 0;
+ }
diff --cc samples/ocl/performance.cpp

index 2c999ac,695516f..46f71a4
--- 1/samples/ocl/performance.cpp
--- 2/samples/ocl/performance.cpp
+++ b/samples/ocl/performance.cpp
@@@ -5,17 -5,18 +5,18 @@@
   #include <cstdio>
   #include <vector>
   #include <numeric>
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/imgproc/imgproc.hpp"
- -#include "opencv2/highgui/highgui.hpp"
- -#include "opencv2/calib3d/calib3d.hpp"
- -#include "opencv2/video/video.hpp"
- -#include "opencv2/nonfree/nonfree.hpp"
- -#include "opencv2/objdetect/objdetect.hpp"
- -#include "opencv2/features2d/features2d.hpp"
+ +#include <opencv2/core/utility.hpp>
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/highgui.hpp"
+ +#include "opencv2/calib3d.hpp"
+ +#include "opencv2/video.hpp"
+ +#include "opencv2/nonfree.hpp"
+ +#include "opencv2/objdetect.hpp"
+ +#include "opencv2/features2d.hpp"
   #define USE_OPENCL
   #ifdef USE_OPENCL
- -#include "opencv2/ocl/ocl.hpp"
+ +#include "opencv2/ocl.hpp"
+ #include "opencv2/nonfree/ocl.hpp"
   #endif
   
   #define TAB "    "
author	Andrey Kamaev <andrey.kamaev@itseez.com>
	Thu, 21 Mar 2013 16:59:18 +0000 (20:59 +0400)
committer	Andrey Kamaev <andrey.kamaev@itseez.com>
	Thu, 21 Mar 2013 19:11:54 +0000 (23:11 +0400)
		1	2
CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/OpenCVModule.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
modules/calib3d/src/stereobm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/lapack.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/test/test_math.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/doc/feature_detection_and_description.rst	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/include/opencv2/gpu.hpp	patch \|	diff1 \|	\|	blob \| history
modules/gpu/include/opencv2/gpu/gpu.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/perf/perf_precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/perf4au/main.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/src/arithm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/test/main.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpu/test/test_precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/imgproc/include/opencv2/imgproc/imgproc.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/java/generator/rst_parser.py	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ml/include/opencv2/ml.hpp	patch \|	diff1 \|	\|	blob \| history
modules/ml/src/svm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/nonfree/include/opencv2/nonfree/gpu.hpp	patch \|	\|	diff2 \|	blob \| history
modules/nonfree/include/opencv2/nonfree/ocl.hpp	patch \|	\|	diff2 \|	blob \| history
modules/nonfree/perf/perf_precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/nonfree/src/precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/nonfree/src/surf.ocl.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/nonfree/src/surf_gpu.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/nonfree/test/test_precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/include/opencv2/ocl.hpp	patch \|	diff1 \|	\|	blob \| history
modules/ocl/include/opencv2/ocl/ocl.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/include/opencv2/ocl/private/util.hpp	patch \|	\|	diff2 \|	blob \| history
modules/ocl/src/arithm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/canny.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/fft.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/filtering.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/haar.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/hog.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/hough.cpp	patch \|	diff1 \|	\|	blob \| history
modules/ocl/src/imgproc.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/initialization.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/interpolate_frames.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/matrix_operations.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/mcwutil.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/moments.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/imgproc_hough.cl	patch \|	diff1 \|	\|	blob \| history
modules/ocl/src/opencl/moments.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/pyrlk.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/split_merge.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/stereobm.cpp	patch \|	\|	diff2 \|	blob \| history
modules/ocl/test/precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_calib3d.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/stitching/include/opencv2/stitching/detail/matchers.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/stitching/src/matchers.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/stitching/src/precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/stitching/src/stitcher.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/superres/include/opencv2/superres.hpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/include/opencv2/superres/optical_flow.hpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/perf/perf_precomp.hpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/btv_l1.cpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/btv_l1_gpu.cpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/frame_source.cpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/input_array_utility.cpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/input_array_utility.hpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/optical_flow.cpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/precomp.hpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/src/super_resolution.cpp	patch \|	\|	diff2 \|	blob \| history
modules/superres/test/test_precomp.hpp	patch \|	\|	diff2 \|	blob \| history
modules/ts/include/opencv2/ts/gpu_perf.hpp	patch \|	\|	diff2 \|	blob \| history
modules/ts/include/opencv2/ts/gpu_test.hpp	patch \|	\|	diff2 \|	blob \| history
modules/ts/src/gpu_perf.cpp	patch \|	\|	diff2 \|	blob \| history
samples/cpp/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
samples/cpp/stitching_detailed.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/gpu/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
samples/gpu/bgfg_segm.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/gpu/super_resolution.cpp	patch \|	\|	diff2 \|	blob \| history
samples/ocl/performance.cpp	patch \|	diff1 \|	diff2 \|	blob \| history