Merge remote-tracking branch 'origin/2.4' into merge-2.4

author Roman Donchenko <roman.donchenko@itseez.com>

Tue, 25 Jun 2013 10:08:23 +0000 (14:08 +0400)

committer Roman Donchenko <roman.donchenko@itseez.com>

Tue, 25 Jun 2013 11:55:52 +0000 (15:55 +0400)
author Roman Donchenko <roman.donchenko@itseez.com>
Tue, 25 Jun 2013 10:08:23 +0000 (14:08 +0400)
committer Roman Donchenko <roman.donchenko@itseez.com>
Tue, 25 Jun 2013 11:55:52 +0000 (15:55 +0400)
diff --cc modules/calib3d/include/opencv2/calib3d.hpp

index 64462ee,0000000..2486eb1

mode 100644,000000..100644
--- 1/modules/calib3d/include/opencv2/calib3d.hpp
--- /dev/null
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@@ -1,416 -1,0 +1,416 @@@
- CV_EXPORTS void computeCorrespondEpilines( InputArray points, int whichImage,
-                                            InputArray F, OutputArray lines );
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                          License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_CALIB3D_HPP__
+ +#define __OPENCV_CALIB3D_HPP__
+ +
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/features2d.hpp"
+ +
+ +namespace cv
+ +{
+ +
+ +//! type of the robust estimation algorithm
+ +enum { LMEDS  = 4, //!< least-median algorithm
+ +       RANSAC = 8  //!< RANSAC algorithm
+ +     };
+ +
+ +enum { ITERATIVE = 0,
+ +       EPNP      = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
+ +       P3P       = 2  // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
+ +     };
+ +
+ +enum { CALIB_CB_ADAPTIVE_THRESH = 1,
+ +       CALIB_CB_NORMALIZE_IMAGE = 2,
+ +       CALIB_CB_FILTER_QUADS    = 4,
+ +       CALIB_CB_FAST_CHECK      = 8
+ +     };
+ +
+ +enum { CALIB_CB_SYMMETRIC_GRID  = 1,
+ +       CALIB_CB_ASYMMETRIC_GRID = 2,
+ +       CALIB_CB_CLUSTERING      = 4
+ +     };
+ +
+ +enum { CALIB_USE_INTRINSIC_GUESS = 0x00001,
+ +       CALIB_FIX_ASPECT_RATIO    = 0x00002,
+ +       CALIB_FIX_PRINCIPAL_POINT = 0x00004,
+ +       CALIB_ZERO_TANGENT_DIST   = 0x00008,
+ +       CALIB_FIX_FOCAL_LENGTH    = 0x00010,
+ +       CALIB_FIX_K1              = 0x00020,
+ +       CALIB_FIX_K2              = 0x00040,
+ +       CALIB_FIX_K3              = 0x00080,
+ +       CALIB_FIX_K4              = 0x00800,
+ +       CALIB_FIX_K5              = 0x01000,
+ +       CALIB_FIX_K6              = 0x02000,
+ +       CALIB_RATIONAL_MODEL      = 0x04000,
+ +       CALIB_THIN_PRISM_MODEL    = 0x08000,
+ +       CALIB_FIX_S1_S2_S3_S4     = 0x10000,
+ +       // only for stereo
+ +       CALIB_FIX_INTRINSIC       = 0x00100,
+ +       CALIB_SAME_FOCAL_LENGTH   = 0x00200,
+ +       // for stereo rectification
+ +       CALIB_ZERO_DISPARITY      = 0x00400
+ +     };
+ +
+ +//! the algorithm for finding fundamental matrix
+ +enum { FM_7POINT = 1, //!< 7-point algorithm
+ +       FM_8POINT = 2, //!< 8-point algorithm
+ +       FM_LMEDS  = 4, //!< least-median algorithm
+ +       FM_RANSAC = 8  //!< RANSAC algorithm
+ +     };
+ +
+ +
+ +
+ +//! converts rotation vector to rotation matrix or vice versa using Rodrigues transformation
+ +CV_EXPORTS_W void Rodrigues( InputArray src, OutputArray dst, OutputArray jacobian = noArray() );
+ +
+ +//! computes the best-fit perspective transformation mapping srcPoints to dstPoints.
+ +CV_EXPORTS_W Mat findHomography( InputArray srcPoints, InputArray dstPoints,
+ +                                 int method = 0, double ransacReprojThreshold = 3,
+ +                                 OutputArray mask=noArray());
+ +
+ +//! variant of findHomography for backward compatibility
+ +CV_EXPORTS Mat findHomography( InputArray srcPoints, InputArray dstPoints,
+ +                               OutputArray mask, int method = 0, double ransacReprojThreshold = 3 );
+ +
+ +//! Computes RQ decomposition of 3x3 matrix
+ +CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mtxQ,
+ +                                OutputArray Qx = noArray(),
+ +                                OutputArray Qy = noArray(),
+ +                                OutputArray Qz = noArray());
+ +
+ +//! Decomposes the projection matrix into camera matrix and the rotation martix and the translation vector
+ +CV_EXPORTS_W void decomposeProjectionMatrix( InputArray projMatrix, OutputArray cameraMatrix,
+ +                                             OutputArray rotMatrix, OutputArray transVect,
+ +                                             OutputArray rotMatrixX = noArray(),
+ +                                             OutputArray rotMatrixY = noArray(),
+ +                                             OutputArray rotMatrixZ = noArray(),
+ +                                             OutputArray eulerAngles =noArray() );
+ +
+ +//! computes derivatives of the matrix product w.r.t each of the multiplied matrix coefficients
+ +CV_EXPORTS_W void matMulDeriv( InputArray A, InputArray B, OutputArray dABdA, OutputArray dABdB );
+ +
+ +//! composes 2 [R|t] transformations together. Also computes the derivatives of the result w.r.t the arguments
+ +CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1,
+ +                             InputArray rvec2, InputArray tvec2,
+ +                             OutputArray rvec3, OutputArray tvec3,
+ +                             OutputArray dr3dr1 = noArray(), OutputArray dr3dt1 = noArray(),
+ +                             OutputArray dr3dr2 = noArray(), OutputArray dr3dt2 = noArray(),
+ +                             OutputArray dt3dr1 = noArray(), OutputArray dt3dt1 = noArray(),
+ +                             OutputArray dt3dr2 = noArray(), OutputArray dt3dt2 = noArray() );
+ +
+ +//! projects points from the model coordinate space to the image coordinates. Also computes derivatives of the image coordinates w.r.t the intrinsic and extrinsic camera parameters
+ +CV_EXPORTS_W void projectPoints( InputArray objectPoints,
+ +                                 InputArray rvec, InputArray tvec,
+ +                                 InputArray cameraMatrix, InputArray distCoeffs,
+ +                                 OutputArray imagePoints,
+ +                                 OutputArray jacobian = noArray(),
+ +                                 double aspectRatio = 0 );
+ +
+ +//! computes the camera pose from a few 3D points and the corresponding projections. The outliers are not handled.
+ +CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
+ +                            InputArray cameraMatrix, InputArray distCoeffs,
+ +                            OutputArray rvec, OutputArray tvec,
+ +                            bool useExtrinsicGuess = false, int flags = ITERATIVE );
+ +
+ +//! computes the camera pose from a few 3D points and the corresponding projections. The outliers are possible.
+ +CV_EXPORTS_W void solvePnPRansac( InputArray objectPoints, InputArray imagePoints,
+ +                                  InputArray cameraMatrix, InputArray distCoeffs,
+ +                                  OutputArray rvec, OutputArray tvec,
+ +                                  bool useExtrinsicGuess = false, int iterationsCount = 100,
+ +                                  float reprojectionError = 8.0, int minInliersCount = 100,
+ +                                  OutputArray inliers = noArray(), int flags = ITERATIVE );
+ +
+ +//! initializes camera matrix from a few 3D points and the corresponding projections.
+ +CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints,
+ +                                     InputArrayOfArrays imagePoints,
+ +                                     Size imageSize, double aspectRatio = 1.0 );
+ +
+ +//! finds checkerboard pattern of the specified size in the image
+ +CV_EXPORTS_W bool findChessboardCorners( InputArray image, Size patternSize, OutputArray corners,
+ +                                         int flags = CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE );
+ +
+ +//! finds subpixel-accurate positions of the chessboard corners
+ +CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size );
+ +
+ +//! draws the checkerboard pattern (found or partly found) in the image
+ +CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSize,
+ +                                         InputArray corners, bool patternWasFound );
+ +
+ +//! finds circles' grid pattern of the specified size in the image
+ +CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
+ +                                   OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID,
+ +                                   const Ptr<FeatureDetector> &blobDetector = new SimpleBlobDetector());
+ +
+ +//! finds intrinsic and extrinsic camera parameters from several fews of a known calibration pattern.
+ +CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints,
+ +                                     InputArrayOfArrays imagePoints, Size imageSize,
+ +                                     InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+ +                                     OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+ +                                     int flags = 0, TermCriteria criteria = TermCriteria(
+ +                                        TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
+ +
+ +//! computes several useful camera characteristics from the camera matrix, camera frame resolution and the physical sensor size.
+ +CV_EXPORTS_W void calibrationMatrixValues( InputArray cameraMatrix, Size imageSize,
+ +                                           double apertureWidth, double apertureHeight,
+ +                                           CV_OUT double& fovx, CV_OUT double& fovy,
+ +                                           CV_OUT double& focalLength, CV_OUT Point2d& principalPoint,
+ +                                           CV_OUT double& aspectRatio );
+ +
+ +//! finds intrinsic and extrinsic parameters of a stereo camera
+ +CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints,
+ +                                     InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
+ +                                     InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1,
+ +                                     InputOutputArray cameraMatrix2, InputOutputArray distCoeffs2,
+ +                                     Size imageSize, OutputArray R,OutputArray T, OutputArray E, OutputArray F,
+ +                                     TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6),
+ +                                     int flags = CALIB_FIX_INTRINSIC );
+ +
+ +
+ +//! computes the rectification transformation for a stereo camera from its intrinsic and extrinsic parameters
+ +CV_EXPORTS_W void stereoRectify( InputArray cameraMatrix1, InputArray distCoeffs1,
+ +                                 InputArray cameraMatrix2, InputArray distCoeffs2,
+ +                                 Size imageSize, InputArray R, InputArray T,
+ +                                 OutputArray R1, OutputArray R2,
+ +                                 OutputArray P1, OutputArray P2,
+ +                                 OutputArray Q, int flags = CALIB_ZERO_DISPARITY,
+ +                                 double alpha = -1, Size newImageSize = Size(),
+ +                                 CV_OUT Rect* validPixROI1 = 0, CV_OUT Rect* validPixROI2 = 0 );
+ +
+ +//! computes the rectification transformation for an uncalibrated stereo camera (zero distortion is assumed)
+ +CV_EXPORTS_W bool stereoRectifyUncalibrated( InputArray points1, InputArray points2,
+ +                                             InputArray F, Size imgSize,
+ +                                             OutputArray H1, OutputArray H2,
+ +                                             double threshold = 5 );
+ +
+ +//! computes the rectification transformations for 3-head camera, where all the heads are on the same line.
+ +CV_EXPORTS_W float rectify3Collinear( InputArray cameraMatrix1, InputArray distCoeffs1,
+ +                                      InputArray cameraMatrix2, InputArray distCoeffs2,
+ +                                      InputArray cameraMatrix3, InputArray distCoeffs3,
+ +                                      InputArrayOfArrays imgpt1, InputArrayOfArrays imgpt3,
+ +                                      Size imageSize, InputArray R12, InputArray T12,
+ +                                      InputArray R13, InputArray T13,
+ +                                      OutputArray R1, OutputArray R2, OutputArray R3,
+ +                                      OutputArray P1, OutputArray P2, OutputArray P3,
+ +                                      OutputArray Q, double alpha, Size newImgSize,
+ +                                      CV_OUT Rect* roi1, CV_OUT Rect* roi2, int flags );
+ +
+ +//! returns the optimal new camera matrix
+ +CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs,
+ +                                            Size imageSize, double alpha, Size newImgSize = Size(),
+ +                                            CV_OUT Rect* validPixROI = 0,
+ +                                            bool centerPrincipalPoint = false);
+ +
+ +//! converts point coordinates from normal pixel coordinates to homogeneous coordinates ((x,y)->(x,y,1))
+ +CV_EXPORTS_W void convertPointsToHomogeneous( InputArray src, OutputArray dst );
+ +
+ +//! converts point coordinates from homogeneous to normal pixel coordinates ((x,y,z)->(x/z, y/z))
+ +CV_EXPORTS_W void convertPointsFromHomogeneous( InputArray src, OutputArray dst );
+ +
+ +//! for backward compatibility
+ +CV_EXPORTS void convertPointsHomogeneous( InputArray src, OutputArray dst );
+ +
+ +//! finds fundamental matrix from a set of corresponding 2D points
+ +CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2,
+ +                                     int method = FM_RANSAC,
+ +                                     double param1 = 3., double param2 = 0.99,
+ +                                     OutputArray mask = noArray() );
+ +
+ +//! variant of findFundamentalMat for backward compatibility
+ +CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2,
+ +                                   OutputArray mask, int method = FM_RANSAC,
+ +                                   double param1 = 3., double param2 = 0.99 );
+ +
+ +//! finds essential matrix from a set of corresponding 2D points using five-point algorithm
+ +CV_EXPORTS Mat findEssentialMat( InputArray points1, InputArray points2,
+ +                                 double focal = 1.0, Point2d pp = Point2d(0, 0),
+ +                                 int method = RANSAC, double prob = 0.999,
+ +                                 double threshold = 1.0, OutputArray mask = noArray() );
+ +
+ +//! decompose essential matrix to possible rotation matrix and one translation vector
+ +CV_EXPORTS void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t );
+ +
+ +//! recover relative camera pose from a set of corresponding 2D points
+ +CV_EXPORTS int recoverPose( InputArray E, InputArray points1, InputArray points2,
+ +                            OutputArray R, OutputArray t,
+ +                            double focal = 1.0, Point2d pp = Point2d(0, 0),
+ +                            InputOutputArray mask = noArray() );
+ +
+ +
+ +//! finds coordinates of epipolar lines corresponding the specified points
++CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage,
++                                             InputArray F, OutputArray lines );
+ +
+ +CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2,
+ +                                     InputArray projPoints1, InputArray projPoints2,
+ +                                     OutputArray points4D );
+ +
+ +CV_EXPORTS_W void correctMatches( InputArray F, InputArray points1, InputArray points2,
+ +                                  OutputArray newPoints1, OutputArray newPoints2 );
+ +
+ +//! filters off speckles (small regions of incorrectly computed disparity)
+ +CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal,
+ +                                  int maxSpeckleSize, double maxDiff,
+ +                                  InputOutputArray buf = noArray() );
+ +
+ +//! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by cv::stereoRectify())
+ +CV_EXPORTS_W Rect getValidDisparityROI( Rect roi1, Rect roi2,
+ +                                        int minDisparity, int numberOfDisparities,
+ +                                        int SADWindowSize );
+ +
+ +//! validates disparity using the left-right check. The matrix "cost" should be computed by the stereo correspondence algorithm
+ +CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost,
+ +                                     int minDisparity, int numberOfDisparities,
+ +                                     int disp12MaxDisp = 1 );
+ +
+ +//! reprojects disparity image to 3D: (x,y,d)->(X,Y,Z) using the matrix Q returned by cv::stereoRectify
+ +CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity,
+ +                                      OutputArray _3dImage, InputArray Q,
+ +                                      bool handleMissingValues = false,
+ +                                      int ddepth = -1 );
+ +
+ +CV_EXPORTS_W  int estimateAffine3D(InputArray src, InputArray dst,
+ +                                   OutputArray out, OutputArray inliers,
+ +                                   double ransacThreshold = 3, double confidence = 0.99);
+ +
+ +
+ +
+ +class CV_EXPORTS_W StereoMatcher : public Algorithm
+ +{
+ +public:
+ +    enum { DISP_SHIFT = 4,
+ +           DISP_SCALE = (1 << DISP_SHIFT)
+ +         };
+ +
+ +    CV_WRAP virtual void compute( InputArray left, InputArray right,
+ +                                  OutputArray disparity ) = 0;
+ +
+ +    CV_WRAP virtual int getMinDisparity() const = 0;
+ +    CV_WRAP virtual void setMinDisparity(int minDisparity) = 0;
+ +
+ +    CV_WRAP virtual int getNumDisparities() const = 0;
+ +    CV_WRAP virtual void setNumDisparities(int numDisparities) = 0;
+ +
+ +    CV_WRAP virtual int getBlockSize() const = 0;
+ +    CV_WRAP virtual void setBlockSize(int blockSize) = 0;
+ +
+ +    CV_WRAP virtual int getSpeckleWindowSize() const = 0;
+ +    CV_WRAP virtual void setSpeckleWindowSize(int speckleWindowSize) = 0;
+ +
+ +    CV_WRAP virtual int getSpeckleRange() const = 0;
+ +    CV_WRAP virtual void setSpeckleRange(int speckleRange) = 0;
+ +
+ +    CV_WRAP virtual int getDisp12MaxDiff() const = 0;
+ +    CV_WRAP virtual void setDisp12MaxDiff(int disp12MaxDiff) = 0;
+ +};
+ +
+ +
+ +
+ +class CV_EXPORTS_W StereoBM : public StereoMatcher
+ +{
+ +public:
+ +    enum { PREFILTER_NORMALIZED_RESPONSE = 0,
+ +           PREFILTER_XSOBEL              = 1
+ +         };
+ +
+ +    CV_WRAP virtual int getPreFilterType() const = 0;
+ +    CV_WRAP virtual void setPreFilterType(int preFilterType) = 0;
+ +
+ +    CV_WRAP virtual int getPreFilterSize() const = 0;
+ +    CV_WRAP virtual void setPreFilterSize(int preFilterSize) = 0;
+ +
+ +    CV_WRAP virtual int getPreFilterCap() const = 0;
+ +    CV_WRAP virtual void setPreFilterCap(int preFilterCap) = 0;
+ +
+ +    CV_WRAP virtual int getTextureThreshold() const = 0;
+ +    CV_WRAP virtual void setTextureThreshold(int textureThreshold) = 0;
+ +
+ +    CV_WRAP virtual int getUniquenessRatio() const = 0;
+ +    CV_WRAP virtual void setUniquenessRatio(int uniquenessRatio) = 0;
+ +
+ +    CV_WRAP virtual int getSmallerBlockSize() const = 0;
+ +    CV_WRAP virtual void setSmallerBlockSize(int blockSize) = 0;
+ +
+ +    CV_WRAP virtual Rect getROI1() const = 0;
+ +    CV_WRAP virtual void setROI1(Rect roi1) = 0;
+ +
+ +    CV_WRAP virtual Rect getROI2() const = 0;
+ +    CV_WRAP virtual void setROI2(Rect roi2) = 0;
+ +};
+ +
+ +CV_EXPORTS_W Ptr<StereoBM> createStereoBM(int numDisparities = 0, int blockSize = 21);
+ +
+ +
+ +class CV_EXPORTS_W StereoSGBM : public StereoMatcher
+ +{
+ +public:
+ +    enum { MODE_SGBM = 0,
+ +           MODE_HH   = 1
+ +         };
+ +
+ +    CV_WRAP virtual int getPreFilterCap() const = 0;
+ +    CV_WRAP virtual void setPreFilterCap(int preFilterCap) = 0;
+ +
+ +    CV_WRAP virtual int getUniquenessRatio() const = 0;
+ +    CV_WRAP virtual void setUniquenessRatio(int uniquenessRatio) = 0;
+ +
+ +    CV_WRAP virtual int getP1() const = 0;
+ +    CV_WRAP virtual void setP1(int P1) = 0;
+ +
+ +    CV_WRAP virtual int getP2() const = 0;
+ +    CV_WRAP virtual void setP2(int P2) = 0;
+ +
+ +    CV_WRAP virtual int getMode() const = 0;
+ +    CV_WRAP virtual void setMode(int mode) = 0;
+ +};
+ +
+ +
+ +CV_EXPORTS_W Ptr<StereoSGBM> createStereoSGBM(int minDisparity, int numDisparities, int blockSize,
+ +                                            int P1 = 0, int P2 = 0, int disp12MaxDiff = 0,
+ +                                            int preFilterCap = 0, int uniquenessRatio = 0,
+ +                                            int speckleWindowSize = 0, int speckleRange = 0,
+ +                                            int mode = StereoSGBM::MODE_SGBM);
+ +
+ +} // cv
+ +
+ +#endif
diff --cc modules/core/include/opencv2/core/cuda/limits.hpp

index 4b265da,0000000..0439de7

mode 100644,000000..100644
--- 1/modules/core/include/opencv2/core/cuda/limits.hpp
--- /dev/null
+++ b/modules/core/include/opencv2/core/cuda/limits.hpp
@@@ -1,235 -1,0 +1,122 @@@
- #include <limits>
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
+ +#define __OPENCV_GPU_LIMITS_GPU_HPP__
+ +
-     template<class T> struct numeric_limits
-     {
-         typedef T type;
-         __device__ __forceinline__ static type min()  { return type(); };
-         __device__ __forceinline__ static type max() { return type(); };
-         __device__ __forceinline__ static type epsilon() { return type(); }
-         __device__ __forceinline__ static type round_error() { return type(); }
-         __device__ __forceinline__ static type denorm_min()  { return type(); }
-         __device__ __forceinline__ static type infinity() { return type(); }
-         __device__ __forceinline__ static type quiet_NaN() { return type(); }
-         __device__ __forceinline__ static type signaling_NaN() { return T(); }
-         static const bool is_signed;
-     };
++#include <limits.h>
++#include <float.h>
+ +#include "common.hpp"
+ +
+ +namespace cv { namespace gpu { namespace cudev
+ +{
-     template<> struct numeric_limits<bool>
-     {
-         typedef bool type;
-         __device__ __forceinline__ static type min() { return false; };
-         __device__ __forceinline__ static type max() { return true;  };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = false;
-     };
+ +
-     template<> struct numeric_limits<char>
-     {
-         typedef char type;
-         __device__ __forceinline__ static type min() { return CHAR_MIN; };
-         __device__ __forceinline__ static type max() { return CHAR_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = (char)-1 == -1;
-     };
- 
-     template<> struct numeric_limits<signed char>
-     {
-         typedef char type;
-         __device__ __forceinline__ static type min() { return SCHAR_MIN; };
-         __device__ __forceinline__ static type max() { return SCHAR_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = (signed char)-1 == -1;
-     };
- 
-     template<> struct numeric_limits<unsigned char>
-     {
-         typedef unsigned char type;
-         __device__ __forceinline__ static type min() { return 0; };
-         __device__ __forceinline__ static type max() { return UCHAR_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = false;
-     };
++template <class T> struct numeric_limits;
+ +
-     template<> struct numeric_limits<short>
-     {
-         typedef short type;
-         __device__ __forceinline__ static type min() { return SHRT_MIN; };
-         __device__ __forceinline__ static type max() { return SHRT_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = true;
-     };
++template <> struct numeric_limits<bool>
++{
++    __device__ __forceinline__ static bool min() { return false; }
++    __device__ __forceinline__ static bool max() { return true;  }
++    static const bool is_signed = false;
++};
+ +
-     template<> struct numeric_limits<unsigned short>
-     {
-         typedef unsigned short type;
-         __device__ __forceinline__ static type min() { return 0; };
-         __device__ __forceinline__ static type max() { return USHRT_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = false;
-     };
++template <> struct numeric_limits<signed char>
++{
++    __device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
++    __device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
++    static const bool is_signed = true;
++};
+ +
-     template<> struct numeric_limits<int>
-     {
-         typedef int type;
-         __device__ __forceinline__ static type min() { return INT_MIN; };
-         __device__ __forceinline__ static type max() { return INT_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = true;
-     };
++template <> struct numeric_limits<unsigned char>
++{
++    __device__ __forceinline__ static unsigned char min() { return 0; }
++    __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
++    static const bool is_signed = false;
++};
+ +
-     template<> struct numeric_limits<unsigned int>
-     {
-         typedef unsigned int type;
-         __device__ __forceinline__ static type min() { return 0; };
-         __device__ __forceinline__ static type max() { return UINT_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = false;
-     };
++template <> struct numeric_limits<short>
++{
++    __device__ __forceinline__ static short min() { return SHRT_MIN; }
++    __device__ __forceinline__ static short max() { return SHRT_MAX; }
++    static const bool is_signed = true;
++};
+ +
++template <> struct numeric_limits<unsigned short>
++{
++    __device__ __forceinline__ static unsigned short min() { return 0; }
++    __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
++    static const bool is_signed = false;
++};
+ +
-     template<> struct numeric_limits<long>
-     {
-         typedef long type;
-         __device__ __forceinline__ static type min() { return LONG_MIN; };
-         __device__ __forceinline__ static type max() { return LONG_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = true;
-     };
++template <> struct numeric_limits<int>
++{
++    __device__ __forceinline__ static int min() { return INT_MIN; }
++    __device__ __forceinline__ static int max() { return INT_MAX; }
++    static const bool is_signed = true;
++};
+ +
-     template<> struct numeric_limits<unsigned long>
-     {
-         typedef unsigned long type;
-         __device__ __forceinline__ static type min() { return 0; };
-         __device__ __forceinline__ static type max() { return ULONG_MAX; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = false;
-     };
++template <> struct numeric_limits<unsigned int>
++{
++    __device__ __forceinline__ static unsigned int min() { return 0; }
++    __device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
++    static const bool is_signed = false;
++};
+ +
-     template<> struct numeric_limits<float>
-     {
-         typedef float type;
-         __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
-         __device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
-         __device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = true;
-     };
++template <> struct numeric_limits<float>
++{
++    __device__ __forceinline__ static float min() { return FLT_MIN; }
++    __device__ __forceinline__ static float max() { return FLT_MAX; }
++    __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
++    static const bool is_signed = true;
++};
+ +
-     template<> struct numeric_limits<double>
-     {
-         typedef double type;
-         __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
-         __device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
-         __device__ __forceinline__ static type epsilon();
-         __device__ __forceinline__ static type round_error();
-         __device__ __forceinline__ static type denorm_min();
-         __device__ __forceinline__ static type infinity();
-         __device__ __forceinline__ static type quiet_NaN();
-         __device__ __forceinline__ static type signaling_NaN();
-         static const bool is_signed = true;
-     };
++template <> struct numeric_limits<double>
++{
++    __device__ __forceinline__ static double min() { return DBL_MIN; }
++    __device__ __forceinline__ static double max() { return DBL_MAX; }
++    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
++    static const bool is_signed = true;
++};
+ +
+ +}}} // namespace cv { namespace gpu { namespace cudev {
+ +
+ +#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
diff --cc modules/core/include/opencv2/core/mat.hpp

index 8c9b10c,f798d7f..b1162a1
--- 1/modules/core/include/opencv2/core/mat.hpp
--- 2/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@@ -41,1811 -40,1131 +41,1812 @@@
   //
   //M*/
   
- -#ifndef __OPENCV_CORE_MATRIX_OPERATIONS_HPP__
- -#define __OPENCV_CORE_MATRIX_OPERATIONS_HPP__
+ +#ifndef __OPENCV_CORE_MAT_HPP__
+ +#define __OPENCV_CORE_MAT_HPP__
   
- -#ifndef SKIP_INCLUDES
- -#include <limits.h>
- -#include <string.h>
- -#endif // SKIP_INCLUDES
- -
- -#ifdef __cplusplus
- -
- -namespace cv
- -{
+ +#ifndef __cplusplus
+ +#  error mat.hpp header must be compiled as C++
+ +#endif
   
- -//////////////////////////////// Mat ////////////////////////////////
+ +#include "opencv2/core/matx.hpp"
+ +#include "opencv2/core/types.hpp"
   
- -inline void Mat::initEmpty()
- -{
- -    flags = MAGIC_VAL;
- -    dims = rows = cols = 0;
- -    data = datastart = dataend = datalimit = 0;
- -    refcount = 0;
- -    allocator = 0;
- -}
- -
- -inline Mat::Mat() : size(&rows)
- -{
- -    initEmpty();
- -}
   
- -inline Mat::Mat(int _rows, int _cols, int _type) : size(&rows)
+ +namespace cv
   {
- -    initEmpty();
- -    create(_rows, _cols, _type);
- -}
   
- -inline Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s) : size(&rows)
- -{
- -    initEmpty();
- -    create(_rows, _cols, _type);
- -    *this = _s;
- -}
+ +//////////////////////// Input/Output Array Arguments /////////////////////////////////
   
- -inline Mat::Mat(Size _sz, int _type) : size(&rows)
+ +/*!
+ + Proxy datatype for passing Mat's and vector<>'s as input parameters
+ + */
+ +class CV_EXPORTS _InputArray
   {
- -    initEmpty();
- -    create( _sz.height, _sz.width, _type );
- -}
+ +public:
+ +    enum {
+ +        KIND_SHIFT = 16,
+ +        FIXED_TYPE = 0x8000 << KIND_SHIFT,
+ +        FIXED_SIZE = 0x4000 << KIND_SHIFT,
+ +        KIND_MASK = ~(FIXED_TYPE|FIXED_SIZE) - (1 << KIND_SHIFT) + 1,
+ +
+ +        NONE              = 0 << KIND_SHIFT,
+ +        MAT               = 1 << KIND_SHIFT,
+ +        MATX              = 2 << KIND_SHIFT,
+ +        STD_VECTOR        = 3 << KIND_SHIFT,
+ +        STD_VECTOR_VECTOR = 4 << KIND_SHIFT,
+ +        STD_VECTOR_MAT    = 5 << KIND_SHIFT,
+ +        EXPR              = 6 << KIND_SHIFT,
+ +        OPENGL_BUFFER     = 7 << KIND_SHIFT,
+ +        CUDA_MEM          = 8 << KIND_SHIFT,
-         GPU_MAT           = 9 << KIND_SHIFT
++        GPU_MAT           = 9 << KIND_SHIFT,
++        OCL_MAT           =10 << KIND_SHIFT
+ +    };
+ +
+ +    _InputArray();
+ +    _InputArray(const Mat& m);
+ +    _InputArray(const MatExpr& expr);
+ +    _InputArray(const std::vector<Mat>& vec);
+ +    template<typename _Tp> _InputArray(const Mat_<_Tp>& m);
+ +    template<typename _Tp> _InputArray(const std::vector<_Tp>& vec);
+ +    template<typename _Tp> _InputArray(const std::vector<std::vector<_Tp> >& vec);
+ +    template<typename _Tp> _InputArray(const std::vector<Mat_<_Tp> >& vec);
+ +    template<typename _Tp> _InputArray(const _Tp* vec, int n);
+ +    template<typename _Tp, int m, int n> _InputArray(const Matx<_Tp, m, n>& matx);
+ +    _InputArray(const double& val);
+ +    _InputArray(const gpu::GpuMat& d_mat);
+ +    _InputArray(const ogl::Buffer& buf);
+ +    _InputArray(const gpu::CudaMem& cuda_mem);
+ +
+ +    virtual Mat getMat(int i=-1) const;
+ +    virtual void getMatVector(std::vector<Mat>& mv) const;
+ +    virtual gpu::GpuMat getGpuMat() const;
+ +    virtual ogl::Buffer getOGlBuffer() const;
+ +
+ +    virtual int kind() const;
+ +    virtual Size size(int i=-1) const;
+ +    virtual size_t total(int i=-1) const;
+ +    virtual int type(int i=-1) const;
+ +    virtual int depth(int i=-1) const;
+ +    virtual int channels(int i=-1) const;
+ +    virtual bool empty() const;
+ +
+ +    virtual ~_InputArray();
   
- -inline Mat::Mat(Size _sz, int _type, const Scalar& _s) : size(&rows)
- -{
- -    initEmpty();
- -    create(_sz.height, _sz.width, _type);
- -    *this = _s;
- -}
+ +    int flags;
+ +    void* obj;
+ +    Size sz;
+ +};
   
- -inline Mat::Mat(int _dims, const int* _sz, int _type) : size(&rows)
- -{
- -    initEmpty();
- -    create(_dims, _sz, _type);
- -}
   
- -inline Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s) : size(&rows)
- -{
- -    initEmpty();
- -    create(_dims, _sz, _type);
- -    *this = _s;
- -}
- -
- -inline Mat::Mat(const Mat& m)
- -    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
- -    refcount(m.refcount), datastart(m.datastart), dataend(m.dataend),
- -    datalimit(m.datalimit), allocator(m.allocator), size(&rows)
- -{
- -    if( refcount )
- -        CV_XADD(refcount, 1);
- -    if( m.dims <= 2 )
- -    {
- -        step[0] = m.step[0]; step[1] = m.step[1];
- -    }
- -    else
- -    {
- -        dims = 0;
- -        copySize(m);
- -    }
- -}
- -
- -inline Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
- -    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols),
- -    data((uchar*)_data), refcount(0), datastart((uchar*)_data), dataend(0),
- -    datalimit(0), allocator(0), size(&rows)
- -{
- -    size_t esz = CV_ELEM_SIZE(_type), minstep = cols*esz;
- -    if( _step == AUTO_STEP )
- -    {
- -        _step = minstep;
- -        flags |= CONTINUOUS_FLAG;
- -    }
- -    else
- -    {
- -        if( rows == 1 ) _step = minstep;
- -        CV_DbgAssert( _step >= minstep );
- -        flags |= _step == minstep ? CONTINUOUS_FLAG : 0;
- -    }
- -    step[0] = _step; step[1] = esz;
- -    datalimit = datastart + _step*rows;
- -    dataend = datalimit - _step + minstep;
- -}
- -
- -inline Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
- -    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width),
- -    data((uchar*)_data), refcount(0), datastart((uchar*)_data), dataend(0),
- -    datalimit(0), allocator(0), size(&rows)
- -{
- -    size_t esz = CV_ELEM_SIZE(_type), minstep = cols*esz;
- -    if( _step == AUTO_STEP )
- -    {
- -        _step = minstep;
- -        flags |= CONTINUOUS_FLAG;
- -    }
- -    else
- -    {
- -        if( rows == 1 ) _step = minstep;
- -        CV_DbgAssert( _step >= minstep );
- -        flags |= _step == minstep ? CONTINUOUS_FLAG : 0;
- -    }
- -    step[0] = _step; step[1] = esz;
- -    datalimit = datastart + _step*rows;
- -    dataend = datalimit - _step + minstep;
- -}
- -
- -
- -template<typename _Tp> inline Mat::Mat(const vector<_Tp>& vec, bool copyData)
- -    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG),
- -    dims(2), rows((int)vec.size()), cols(1), data(0), refcount(0),
- -    datastart(0), dataend(0), allocator(0), size(&rows)
+ +/*!
+ + Proxy datatype for passing Mat's and vector<>'s as input parameters
+ + */
+ +class CV_EXPORTS _OutputArray : public _InputArray
   {
- -    if(vec.empty())
- -        return;
- -    if( !copyData )
- -    {
- -        step[0] = step[1] = sizeof(_Tp);
- -        data = datastart = (uchar*)&vec[0];
- -        datalimit = dataend = datastart + rows*step[0];
- -    }
- -    else
- -        Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this);
- -}
- -
- -
- -template<typename _Tp, int n> inline Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
- -    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG),
- -    dims(2), rows(n), cols(1), data(0), refcount(0),
- -    datastart(0), dataend(0), allocator(0), size(&rows)
- -{
- -    if( !copyData )
- -    {
- -        step[0] = step[1] = sizeof(_Tp);
- -        data = datastart = (uchar*)vec.val;
- -        datalimit = dataend = datastart + rows*step[0];
- -    }
- -    else
- -        Mat(n, 1, DataType<_Tp>::type, (void*)vec.val).copyTo(*this);
- -}
- -
- -
- -template<typename _Tp, int m, int n> inline Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
- -    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG),
- -    dims(2), rows(m), cols(n), data(0), refcount(0),
- -    datastart(0), dataend(0), allocator(0), size(&rows)
- -{
- -    if( !copyData )
- -    {
- -        step[0] = cols*sizeof(_Tp);
- -        step[1] = sizeof(_Tp);
- -        data = datastart = (uchar*)M.val;
- -        datalimit = dataend = datastart + rows*step[0];
- -    }
- -    else
- -        Mat(m, n, DataType<_Tp>::type, (uchar*)M.val).copyTo(*this);
- -}
- -
- -
- -template<typename _Tp> inline Mat::Mat(const Point_<_Tp>& pt, bool copyData)
- -    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG),
- -    dims(2), rows(2), cols(1), data(0), refcount(0),
- -    datastart(0), dataend(0), allocator(0), size(&rows)
- -{
- -    if( !copyData )
- -    {
- -        step[0] = step[1] = sizeof(_Tp);
- -        data = datastart = (uchar*)&pt.x;
- -        datalimit = dataend = datastart + rows*step[0];
- -    }
- -    else
+ +public:
+ +    enum
       {
- -        create(2, 1, DataType<_Tp>::type);
- -        ((_Tp*)data)[0] = pt.x;
- -        ((_Tp*)data)[1] = pt.y;
- -    }
- -}
- -
+ +        DEPTH_MASK_8U = 1 << CV_8U,
+ +        DEPTH_MASK_8S = 1 << CV_8S,
+ +        DEPTH_MASK_16U = 1 << CV_16U,
+ +        DEPTH_MASK_16S = 1 << CV_16S,
+ +        DEPTH_MASK_32S = 1 << CV_32S,
+ +        DEPTH_MASK_32F = 1 << CV_32F,
+ +        DEPTH_MASK_64F = 1 << CV_64F,
+ +        DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1,
+ +        DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S,
+ +        DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F
+ +    };
+ +
+ +    _OutputArray();
+ +    _OutputArray(Mat& m);
+ +    _OutputArray(std::vector<Mat>& vec);
+ +    _OutputArray(gpu::GpuMat& d_mat);
+ +    _OutputArray(ogl::Buffer& buf);
+ +    _OutputArray(gpu::CudaMem& cuda_mem);
+ +    template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
+ +    template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
+ +    template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
+ +    template<typename _Tp> _OutputArray(Mat_<_Tp>& m);
+ +    template<typename _Tp> _OutputArray(_Tp* vec, int n);
+ +    template<typename _Tp, int m, int n> _OutputArray(Matx<_Tp, m, n>& matx);
+ +
+ +    _OutputArray(const Mat& m);
+ +    _OutputArray(const std::vector<Mat>& vec);
+ +    _OutputArray(const gpu::GpuMat& d_mat);
+ +    _OutputArray(const ogl::Buffer& buf);
+ +    _OutputArray(const gpu::CudaMem& cuda_mem);
+ +    template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
+ +    template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
+ +    template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
+ +    template<typename _Tp> _OutputArray(const Mat_<_Tp>& m);
+ +    template<typename _Tp> _OutputArray(const _Tp* vec, int n);
+ +    template<typename _Tp, int m, int n> _OutputArray(const Matx<_Tp, m, n>& matx);
+ +
+ +    virtual bool fixedSize() const;
+ +    virtual bool fixedType() const;
+ +    virtual bool needed() const;
+ +    virtual Mat& getMatRef(int i=-1) const;
+ +    virtual gpu::GpuMat& getGpuMatRef() const;
+ +    virtual ogl::Buffer& getOGlBufferRef() const;
+ +    virtual gpu::CudaMem& getCudaMemRef() const;
+ +    virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+ +    virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+ +    virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+ +    virtual void release() const;
+ +    virtual void clear() const;
+ +
+ +    virtual ~_OutputArray();
+ +};
   
- -template<typename _Tp> inline Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
- -    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG),
- -    dims(2), rows(3), cols(1), data(0), refcount(0),
- -    datastart(0), dataend(0), allocator(0), size(&rows)
- -{
- -    if( !copyData )
- -    {
- -        step[0] = step[1] = sizeof(_Tp);
- -        data = datastart = (uchar*)&pt.x;
- -        datalimit = dataend = datastart + rows*step[0];
- -    }
- -    else
- -    {
- -        create(3, 1, DataType<_Tp>::type);
- -        ((_Tp*)data)[0] = pt.x;
- -        ((_Tp*)data)[1] = pt.y;
- -        ((_Tp*)data)[2] = pt.z;
- -    }
- -}
- -
- -
- -template<typename _Tp> inline Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
- -    : flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG),
- -    dims(0), rows(0), cols(0), data(0), refcount(0),
- -    datastart(0), dataend(0), allocator(0), size(&rows)
- -{
- -    *this = *commaInitializer;
- -}
+ +typedef const _InputArray& InputArray;
+ +typedef InputArray InputArrayOfArrays;
+ +typedef const _OutputArray& OutputArray;
+ +typedef OutputArray OutputArrayOfArrays;
+ +typedef OutputArray InputOutputArray;
+ +typedef OutputArray InputOutputArrayOfArrays;
   
- -inline Mat::~Mat()
- -{
- -    release();
- -    if( step.p != step.buf )
- -        fastFree(step.p);
- -}
+ +CV_EXPORTS OutputArray noArray();
   
- -inline Mat& Mat::operator = (const Mat& m)
- -{
- -    if( this != &m )
- -    {
- -        if( m.refcount )
- -            CV_XADD(m.refcount, 1);
- -        release();
- -        flags = m.flags;
- -        if( dims <= 2 && m.dims <= 2 )
- -        {
- -            dims = m.dims;
- -            rows = m.rows;
- -            cols = m.cols;
- -            step[0] = m.step[0];
- -            step[1] = m.step[1];
- -        }
- -        else
- -            copySize(m);
- -        data = m.data;
- -        datastart = m.datastart;
- -        dataend = m.dataend;
- -        datalimit = m.datalimit;
- -        refcount = m.refcount;
- -        allocator = m.allocator;
- -    }
- -    return *this;
- -}
- -
- -inline Mat Mat::row(int y) const { return Mat(*this, Range(y, y+1), Range::all()); }
- -inline Mat Mat::col(int x) const { return Mat(*this, Range::all(), Range(x, x+1)); }
- -inline Mat Mat::rowRange(int startrow, int endrow) const
- -    { return Mat(*this, Range(startrow, endrow), Range::all()); }
- -inline Mat Mat::rowRange(const Range& r) const
- -    { return Mat(*this, r, Range::all()); }
- -inline Mat Mat::colRange(int startcol, int endcol) const
- -    { return Mat(*this, Range::all(), Range(startcol, endcol)); }
- -inline Mat Mat::colRange(const Range& r) const
- -    { return Mat(*this, Range::all(), r); }
- -
- -inline Mat Mat::diag(const Mat& d)
- -{
- -    CV_Assert( d.cols == 1 || d.rows == 1 );
- -    int len = d.rows + d.cols - 1;
- -    Mat m(len, len, d.type(), Scalar(0)), md = m.diag();
- -    if( d.cols == 1 )
- -        d.copyTo(md);
- -    else
- -        transpose(d, md);
- -    return m;
- -}
- -
- -inline Mat Mat::clone() const
- -{
- -    Mat m;
- -    copyTo(m);
- -    return m;
- -}
   
- -inline void Mat::assignTo( Mat& m, int _type ) const
- -{
- -    if( _type < 0 )
- -        m = *this;
- -    else
- -        convertTo(m, _type);
- -}
   
- -inline void Mat::create(int _rows, int _cols, int _type)
- -{
- -    _type &= TYPE_MASK;
- -    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && data )
- -        return;
- -    int sz[] = {_rows, _cols};
- -    create(2, sz, _type);
- -}
- -
- -inline void Mat::create(Size _sz, int _type)
- -{
- -    create(_sz.height, _sz.width, _type);
- -}
+ +/////////////////////////////////// MatAllocator //////////////////////////////////////
   
- -inline void Mat::addref()
- -{ if( refcount ) CV_XADD(refcount, 1); }
+ +/*!
+ +   Custom array allocator
   
- -inline void Mat::release()
+ +*/
+ +class CV_EXPORTS MatAllocator
   {
- -    if( refcount && CV_XADD(refcount, -1) == 1 )
- -        deallocate();
- -    data = datastart = dataend = datalimit = 0;
- -    size.p[0] = 0;
- -    refcount = 0;
- -}
- -
- -inline Mat Mat::operator()( Range _rowRange, Range _colRange ) const
- -{
- -    return Mat(*this, _rowRange, _colRange);
- -}
- -
- -inline Mat Mat::operator()( const Rect& roi ) const
- -{ return Mat(*this, roi); }
+ +public:
+ +    MatAllocator() {}
+ +    virtual ~MatAllocator() {}
+ +    virtual void allocate(int dims, const int* sizes, int type, int*& refcount,
+ +                          uchar*& datastart, uchar*& data, size_t* step) = 0;
+ +    virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
+ +};
   
- -inline Mat Mat::operator()(const Range* ranges) const
- -{
- -    return Mat(*this, ranges);
- -}
   
- -inline Mat::operator CvMat() const
- -{
- -    CV_DbgAssert(dims <= 2);
- -    CvMat m = cvMat(rows, dims == 1 ? 1 : cols, type(), data);
- -    m.step = (int)step[0];
- -    m.type = (m.type & ~CONTINUOUS_FLAG) | (flags & CONTINUOUS_FLAG);
- -    return m;
- -}
- -
- -inline bool Mat::isContinuous() const { return (flags & CONTINUOUS_FLAG) != 0; }
- -inline bool Mat::isSubmatrix() const { return (flags & SUBMATRIX_FLAG) != 0; }
- -inline size_t Mat::elemSize() const { return dims > 0 ? step.p[dims-1] : 0; }
- -inline size_t Mat::elemSize1() const { return CV_ELEM_SIZE1(flags); }
- -inline int Mat::type() const { return CV_MAT_TYPE(flags); }
- -inline int Mat::depth() const { return CV_MAT_DEPTH(flags); }
- -inline int Mat::channels() const { return CV_MAT_CN(flags); }
- -inline size_t Mat::step1(int i) const { return step.p[i]/elemSize1(); }
- -inline bool Mat::empty() const { return data == 0 || total() == 0; }
- -inline size_t Mat::total() const
- -{
- -    if( dims <= 2 )
- -        return (size_t)rows*cols;
- -    size_t p = 1;
- -    for( int i = 0; i < dims; i++ )
- -        p *= size[i];
- -    return p;
- -}
- -
- -inline uchar* Mat::ptr(int y)
- -{
- -    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
- -    return data + step.p[0]*y;
- -}
   
- -inline const uchar* Mat::ptr(int y) const
- -{
- -    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
- -    return data + step.p[0]*y;
- -}
+ +//////////////////////////////// MatCommaInitializer //////////////////////////////////
   
- -template<typename _Tp> inline _Tp* Mat::ptr(int y)
- -{
- -    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
- -    return (_Tp*)(data + step.p[0]*y);
- -}
+ +/*!
+ + Comma-separated Matrix Initializer
   
- -template<typename _Tp> inline const _Tp* Mat::ptr(int y) const
- -{
- -    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
- -    return (const _Tp*)(data + step.p[0]*y);
- -}
+ + The class instances are usually not created explicitly.
+ + Instead, they are created on "matrix << firstValue" operator.
   
+ + The sample below initializes 2x2 rotation matrix:
   
- -inline uchar* Mat::ptr(int i0, int i1)
+ + \code
+ + double angle = 30, a = cos(angle*CV_PI/180), b = sin(angle*CV_PI/180);
+ + Mat R = (Mat_<double>(2,2) << a, -b, b, a);
+ + \endcode
+ +*/
+ +template<typename _Tp> class MatCommaInitializer_
   {
- -    CV_DbgAssert( dims >= 2 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] );
- -    return data + i0*step.p[0] + i1*step.p[1];
- -}
+ +public:
+ +    //! the constructor, created by "matrix << firstValue" operator, where matrix is cv::Mat
+ +    MatCommaInitializer_(Mat_<_Tp>* _m);
+ +    //! the operator that takes the next value and put it to the matrix
+ +    template<typename T2> MatCommaInitializer_<_Tp>& operator , (T2 v);
+ +    //! another form of conversion operator
+ +    operator Mat_<_Tp>() const;
+ +protected:
+ +    MatIterator_<_Tp> it;
+ +};
   
- -inline const uchar* Mat::ptr(int i0, int i1) const
- -{
- -    CV_DbgAssert( dims >= 2 && data &&
- -                 (unsigned)i0 < (unsigned)size.p[0] &&
- -                 (unsigned)i1 < (unsigned)size.p[1] );
- -    return data + i0*step.p[0] + i1*step.p[1];
- -}
   
- -template<typename _Tp> inline _Tp* Mat::ptr(int i0, int i1)
- -{
- -    CV_DbgAssert( dims >= 2 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] );
- -    return (_Tp*)(data + i0*step.p[0] + i1*step.p[1]);
- -}
   
- -template<typename _Tp> inline const _Tp* Mat::ptr(int i0, int i1) const
- -{
- -    CV_DbgAssert( dims >= 2 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] );
- -    return (const _Tp*)(data + i0*step.p[0] + i1*step.p[1]);
- -}
   
- -inline uchar* Mat::ptr(int i0, int i1, int i2)
- -{
- -    CV_DbgAssert( dims >= 3 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] &&
- -                  (unsigned)i2 < (unsigned)size.p[2] );
- -    return data + i0*step.p[0] + i1*step.p[1] + i2*step.p[2];
- -}
- -
- -inline const uchar* Mat::ptr(int i0, int i1, int i2) const
- -{
- -    CV_DbgAssert( dims >= 3 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] &&
- -                  (unsigned)i2 < (unsigned)size.p[2] );
- -    return data + i0*step.p[0] + i1*step.p[1] + i2*step.p[2];
- -}
- -
- -template<typename _Tp> inline _Tp* Mat::ptr(int i0, int i1, int i2)
+ +/////////////////////////////////////// Mat ///////////////////////////////////////////
+ +
+ +/*!
+ +   The n-dimensional matrix class.
+ +
+ +   The class represents an n-dimensional dense numerical array that can act as
+ +   a matrix, image, optical flow map, 3-focal tensor etc.
+ +   It is very similar to CvMat and CvMatND types from earlier versions of OpenCV,
+ +   and similarly to those types, the matrix can be multi-channel. It also fully supports ROI mechanism.
+ +
+ +   There are many different ways to create cv::Mat object. Here are the some popular ones:
+ +   <ul>
+ +   <li> using cv::Mat::create(nrows, ncols, type) method or
+ +     the similar constructor cv::Mat::Mat(nrows, ncols, type[, fill_value]) constructor.
+ +     A new matrix of the specified size and specifed type will be allocated.
+ +     "type" has the same meaning as in cvCreateMat function,
+ +     e.g. CV_8UC1 means 8-bit single-channel matrix, CV_32FC2 means 2-channel (i.e. complex)
+ +     floating-point matrix etc:
+ +
+ +     \code
+ +     // make 7x7 complex matrix filled with 1+3j.
+ +     cv::Mat M(7,7,CV_32FC2,Scalar(1,3));
+ +     // and now turn M to 100x60 15-channel 8-bit matrix.
+ +     // The old content will be deallocated
+ +     M.create(100,60,CV_8UC(15));
+ +     \endcode
+ +
+ +     As noted in the introduction of this chapter, Mat::create()
+ +     will only allocate a new matrix when the current matrix dimensionality
+ +     or type are different from the specified.
+ +
+ +   <li> by using a copy constructor or assignment operator, where on the right side it can
+ +     be a matrix or expression, see below. Again, as noted in the introduction,
+ +     matrix assignment is O(1) operation because it only copies the header
+ +     and increases the reference counter. cv::Mat::clone() method can be used to get a full
+ +     (a.k.a. deep) copy of the matrix when you need it.
+ +
+ +   <li> by constructing a header for a part of another matrix. It can be a single row, single column,
+ +     several rows, several columns, rectangular region in the matrix (called a minor in algebra) or
+ +     a diagonal. Such operations are also O(1), because the new header will reference the same data.
+ +     You can actually modify a part of the matrix using this feature, e.g.
+ +
+ +     \code
+ +     // add 5-th row, multiplied by 3 to the 3rd row
+ +     M.row(3) = M.row(3) + M.row(5)*3;
+ +
+ +     // now copy 7-th column to the 1-st column
+ +     // M.col(1) = M.col(7); // this will not work
+ +     Mat M1 = M.col(1);
+ +     M.col(7).copyTo(M1);
+ +
+ +     // create new 320x240 image
+ +     cv::Mat img(Size(320,240),CV_8UC3);
+ +     // select a roi
+ +     cv::Mat roi(img, Rect(10,10,100,100));
+ +     // fill the ROI with (0,255,0) (which is green in RGB space);
+ +     // the original 320x240 image will be modified
+ +     roi = Scalar(0,255,0);
+ +     \endcode
+ +
+ +     Thanks to the additional cv::Mat::datastart and cv::Mat::dataend members, it is possible to
+ +     compute the relative sub-matrix position in the main "container" matrix using cv::Mat::locateROI():
+ +
+ +     \code
+ +     Mat A = Mat::eye(10, 10, CV_32S);
+ +     // extracts A columns, 1 (inclusive) to 3 (exclusive).
+ +     Mat B = A(Range::all(), Range(1, 3));
+ +     // extracts B rows, 5 (inclusive) to 9 (exclusive).
+ +     // that is, C ~ A(Range(5, 9), Range(1, 3))
+ +     Mat C = B(Range(5, 9), Range::all());
+ +     Size size; Point ofs;
+ +     C.locateROI(size, ofs);
+ +     // size will be (width=10,height=10) and the ofs will be (x=1, y=5)
+ +     \endcode
+ +
+ +     As in the case of whole matrices, if you need a deep copy, use cv::Mat::clone() method
+ +     of the extracted sub-matrices.
+ +
+ +   <li> by making a header for user-allocated-data. It can be useful for
+ +      <ol>
+ +      <li> processing "foreign" data using OpenCV (e.g. when you implement
+ +         a DirectShow filter or a processing module for gstreamer etc.), e.g.
+ +
+ +         \code
+ +         void process_video_frame(const unsigned char* pixels,
+ +                                  int width, int height, int step)
+ +         {
+ +            cv::Mat img(height, width, CV_8UC3, pixels, step);
+ +            cv::GaussianBlur(img, img, cv::Size(7,7), 1.5, 1.5);
+ +         }
+ +         \endcode
+ +
+ +      <li> for quick initialization of small matrices and/or super-fast element access
+ +
+ +         \code
+ +         double m[3][3] = {{a, b, c}, {d, e, f}, {g, h, i}};
+ +         cv::Mat M = cv::Mat(3, 3, CV_64F, m).inv();
+ +         \endcode
+ +      </ol>
+ +
+ +       partial yet very common cases of this "user-allocated data" case are conversions
+ +       from CvMat and IplImage to cv::Mat. For this purpose there are special constructors
+ +       taking pointers to CvMat or IplImage and the optional
+ +       flag indicating whether to copy the data or not.
+ +
+ +       Backward conversion from cv::Mat to CvMat or IplImage is provided via cast operators
+ +       cv::Mat::operator CvMat() an cv::Mat::operator IplImage().
+ +       The operators do not copy the data.
+ +
+ +
+ +       \code
+ +       IplImage* img = cvLoadImage("greatwave.jpg", 1);
+ +       Mat mtx(img); // convert IplImage* -> cv::Mat
+ +       CvMat oldmat = mtx; // convert cv::Mat -> CvMat
+ +       CV_Assert(oldmat.cols == img->width && oldmat.rows == img->height &&
+ +           oldmat.data.ptr == (uchar*)img->imageData && oldmat.step == img->widthStep);
+ +       \endcode
+ +
+ +   <li> by using MATLAB-style matrix initializers, cv::Mat::zeros(), cv::Mat::ones(), cv::Mat::eye(), e.g.:
+ +
+ +   \code
+ +   // create a double-precision identity martix and add it to M.
+ +   M += Mat::eye(M.rows, M.cols, CV_64F);
+ +   \endcode
+ +
+ +   <li> by using comma-separated initializer:
+ +
+ +   \code
+ +   // create 3x3 double-precision identity matrix
+ +   Mat M = (Mat_<double>(3,3) << 1, 0, 0, 0, 1, 0, 0, 0, 1);
+ +   \endcode
+ +
+ +   here we first call constructor of cv::Mat_ class (that we describe further) with the proper matrix,
+ +   and then we just put "<<" operator followed by comma-separated values that can be constants,
+ +   variables, expressions etc. Also, note the extra parentheses that are needed to avoid compiler errors.
+ +
+ +   </ul>
+ +
+ +   Once matrix is created, it will be automatically managed by using reference-counting mechanism
+ +   (unless the matrix header is built on top of user-allocated data,
+ +   in which case you should handle the data by yourself).
+ +   The matrix data will be deallocated when no one points to it;
+ +   if you want to release the data pointed by a matrix header before the matrix destructor is called,
+ +   use cv::Mat::release().
+ +
+ +   The next important thing to learn about the matrix class is element access. Here is how the matrix is stored.
+ +   The elements are stored in row-major order (row by row). The cv::Mat::data member points to the first element of the first row,
+ +   cv::Mat::rows contains the number of matrix rows and cv::Mat::cols - the number of matrix columns. There is yet another member,
+ +   cv::Mat::step that is used to actually compute address of a matrix element. cv::Mat::step is needed because the matrix can be
+ +   a part of another matrix or because there can some padding space in the end of each row for a proper alignment.
+ +
+ +   \image html roi.png
+ +
+ +   Given these parameters, address of the matrix element M_{ij} is computed as following:
+ +
+ +   addr(M_{ij})=M.data + M.step*i + j*M.elemSize()
+ +
+ +   if you know the matrix element type, e.g. it is float, then you can use cv::Mat::at() method:
+ +
+ +   addr(M_{ij})=&M.at<float>(i,j)
+ +
+ +   (where & is used to convert the reference returned by cv::Mat::at() to a pointer).
+ +   if you need to process a whole row of matrix, the most efficient way is to get
+ +   the pointer to the row first, and then just use plain C operator []:
+ +
+ +   \code
+ +   // compute sum of positive matrix elements
+ +   // (assuming that M is double-precision matrix)
+ +   double sum=0;
+ +   for(int i = 0; i < M.rows; i++)
+ +   {
+ +       const double* Mi = M.ptr<double>(i);
+ +       for(int j = 0; j < M.cols; j++)
+ +           sum += std::max(Mi[j], 0.);
+ +   }
+ +   \endcode
+ +
+ +   Some operations, like the above one, do not actually depend on the matrix shape,
+ +   they just process elements of a matrix one by one (or elements from multiple matrices
+ +   that are sitting in the same place, e.g. matrix addition). Such operations are called
+ +   element-wise and it makes sense to check whether all the input/output matrices are continuous,
+ +   i.e. have no gaps in the end of each row, and if yes, process them as a single long row:
+ +
+ +   \code
+ +   // compute sum of positive matrix elements, optimized variant
+ +   double sum=0;
+ +   int cols = M.cols, rows = M.rows;
+ +   if(M.isContinuous())
+ +   {
+ +       cols *= rows;
+ +       rows = 1;
+ +   }
+ +   for(int i = 0; i < rows; i++)
+ +   {
+ +       const double* Mi = M.ptr<double>(i);
+ +       for(int j = 0; j < cols; j++)
+ +           sum += std::max(Mi[j], 0.);
+ +   }
+ +   \endcode
+ +   in the case of continuous matrix the outer loop body will be executed just once,
+ +   so the overhead will be smaller, which will be especially noticeable in the case of small matrices.
+ +
+ +   Finally, there are STL-style iterators that are smart enough to skip gaps between successive rows:
+ +   \code
+ +   // compute sum of positive matrix elements, iterator-based variant
+ +   double sum=0;
+ +   MatConstIterator_<double> it = M.begin<double>(), it_end = M.end<double>();
+ +   for(; it != it_end; ++it)
+ +       sum += std::max(*it, 0.);
+ +   \endcode
+ +
+ +   The matrix iterators are random-access iterators, so they can be passed
+ +   to any STL algorithm, including std::sort().
+ +*/
+ +class CV_EXPORTS Mat
   {
- -    CV_DbgAssert( dims >= 3 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] &&
- -                  (unsigned)i2 < (unsigned)size.p[2] );
- -    return (_Tp*)(data + i0*step.p[0] + i1*step.p[1] + i2*step.p[2]);
- -}
- -
- -template<typename _Tp> inline const _Tp* Mat::ptr(int i0, int i1, int i2) const
- -{
- -    CV_DbgAssert( dims >= 3 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] &&
- -                  (unsigned)i2 < (unsigned)size.p[2] );
- -    return (const _Tp*)(data + i0*step.p[0] + i1*step.p[1] + i2*step.p[2]);
- -}
- -
- -inline uchar* Mat::ptr(const int* idx)
- -{
- -    int i, d = dims;
- -    uchar* p = data;
- -    CV_DbgAssert( d >= 1 && p );
- -    for( i = 0; i < d; i++ )
+ +public:
+ +    //! default constructor
+ +    Mat();
+ +    //! constructs 2D matrix of the specified size and type
+ +    // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ +    Mat(int rows, int cols, int type);
+ +    Mat(Size size, int type);
+ +    //! constucts 2D matrix and fills it with the specified value _s.
+ +    Mat(int rows, int cols, int type, const Scalar& s);
+ +    Mat(Size size, int type, const Scalar& s);
+ +
+ +    //! constructs n-dimensional matrix
+ +    Mat(int ndims, const int* sizes, int type);
+ +    Mat(int ndims, const int* sizes, int type, const Scalar& s);
+ +
+ +    //! copy constructor
+ +    Mat(const Mat& m);
+ +    //! constructor for matrix headers pointing to user-allocated data
+ +    Mat(int rows, int cols, int type, void* data, size_t step=AUTO_STEP);
+ +    Mat(Size size, int type, void* data, size_t step=AUTO_STEP);
+ +    Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0);
+ +
+ +    //! creates a matrix header for a part of the bigger matrix
+ +    Mat(const Mat& m, const Range& rowRange, const Range& colRange=Range::all());
+ +    Mat(const Mat& m, const Rect& roi);
+ +    Mat(const Mat& m, const Range* ranges);
+ +    //! builds matrix from std::vector with or without copying the data
+ +    template<typename _Tp> explicit Mat(const std::vector<_Tp>& vec, bool copyData=false);
+ +    //! builds matrix from cv::Vec; the data is copied by default
+ +    template<typename _Tp, int n> explicit Mat(const Vec<_Tp, n>& vec, bool copyData=true);
+ +    //! builds matrix from cv::Matx; the data is copied by default
+ +    template<typename _Tp, int m, int n> explicit Mat(const Matx<_Tp, m, n>& mtx, bool copyData=true);
+ +    //! builds matrix from a 2D point
+ +    template<typename _Tp> explicit Mat(const Point_<_Tp>& pt, bool copyData=true);
+ +    //! builds matrix from a 3D point
+ +    template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true);
+ +    //! builds matrix from comma initializer
+ +    template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
+ +
+ +    // //! converts old-style CvMat to the new matrix; the data is not copied by default
+ +    // Mat(const CvMat* m, bool copyData=false);
+ +    // //! converts old-style CvMatND to the new matrix; the data is not copied by default
+ +    // Mat(const CvMatND* m, bool copyData=false);
+ +    // //! converts old-style IplImage to the new matrix; the data is not copied by default
+ +    // Mat(const IplImage* img, bool copyData=false);
+ +    //Mat(const void* img, bool copyData=false);
+ +
+ +    //! download data from GpuMat
+ +    explicit Mat(const gpu::GpuMat& m);
+ +
+ +    //! destructor - calls release()
+ +    ~Mat();
+ +    //! assignment operators
+ +    Mat& operator = (const Mat& m);
+ +    Mat& operator = (const MatExpr& expr);
+ +
+ +    //! returns a new matrix header for the specified row
+ +    Mat row(int y) const;
+ +    //! returns a new matrix header for the specified column
+ +    Mat col(int x) const;
+ +    //! ... for the specified row span
+ +    Mat rowRange(int startrow, int endrow) const;
+ +    Mat rowRange(const Range& r) const;
+ +    //! ... for the specified column span
+ +    Mat colRange(int startcol, int endcol) const;
+ +    Mat colRange(const Range& r) const;
+ +    //! ... for the specified diagonal
+ +    // (d=0 - the main diagonal,
+ +    //  >0 - a diagonal from the lower half,
+ +    //  <0 - a diagonal from the upper half)
+ +    Mat diag(int d=0) const;
+ +    //! constructs a square diagonal matrix which main diagonal is vector "d"
+ +    static Mat diag(const Mat& d);
+ +
+ +    //! returns deep copy of the matrix, i.e. the data is copied
+ +    Mat clone() const;
+ +    //! copies the matrix content to "m".
+ +    // It calls m.create(this->size(), this->type()).
+ +    void copyTo( OutputArray m ) const;
+ +    //! copies those matrix elements to "m" that are marked with non-zero mask elements.
+ +    void copyTo( OutputArray m, InputArray mask ) const;
+ +    //! converts matrix to another datatype with optional scalng. See cvConvertScale.
+ +    void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const;
+ +
+ +    void assignTo( Mat& m, int type=-1 ) const;
+ +
+ +    //! sets every matrix element to s
+ +    Mat& operator = (const Scalar& s);
+ +    //! sets some of the matrix elements to s, according to the mask
+ +    Mat& setTo(InputArray value, InputArray mask=noArray());
+ +    //! creates alternative matrix header for the same data, with different
+ +    // number of channels and/or different number of rows. see cvReshape.
+ +    Mat reshape(int cn, int rows=0) const;
+ +    Mat reshape(int cn, int newndims, const int* newsz) const;
+ +
+ +    //! matrix transposition by means of matrix expressions
+ +    MatExpr t() const;
+ +    //! matrix inversion by means of matrix expressions
+ +    MatExpr inv(int method=DECOMP_LU) const;
+ +    //! per-element matrix multiplication by means of matrix expressions
+ +    MatExpr mul(InputArray m, double scale=1) const;
+ +
+ +    //! computes cross-product of 2 3D vectors
+ +    Mat cross(InputArray m) const;
+ +    //! computes dot-product
+ +    double dot(InputArray m) const;
+ +
+ +    //! Matlab-style matrix initialization
+ +    static MatExpr zeros(int rows, int cols, int type);
+ +    static MatExpr zeros(Size size, int type);
+ +    static MatExpr zeros(int ndims, const int* sz, int type);
+ +    static MatExpr ones(int rows, int cols, int type);
+ +    static MatExpr ones(Size size, int type);
+ +    static MatExpr ones(int ndims, const int* sz, int type);
+ +    static MatExpr eye(int rows, int cols, int type);
+ +    static MatExpr eye(Size size, int type);
+ +
+ +    //! allocates new matrix data unless the matrix already has specified size and type.
+ +    // previous data is unreferenced if needed.
+ +    void create(int rows, int cols, int type);
+ +    void create(Size size, int type);
+ +    void create(int ndims, const int* sizes, int type);
+ +
+ +    //! increases the reference counter; use with care to avoid memleaks
+ +    void addref();
+ +    //! decreases reference counter;
+ +    // deallocates the data when reference counter reaches 0.
+ +    void release();
+ +
+ +    //! deallocates the matrix data
+ +    void deallocate();
+ +    //! internal use function; properly re-allocates _size, _step arrays
+ +    void copySize(const Mat& m);
+ +
+ +    //! reserves enough space to fit sz hyper-planes
+ +    void reserve(size_t sz);
+ +    //! resizes matrix to the specified number of hyper-planes
+ +    void resize(size_t sz);
+ +    //! resizes matrix to the specified number of hyper-planes; initializes the newly added elements
+ +    void resize(size_t sz, const Scalar& s);
+ +    //! internal function
+ +    void push_back_(const void* elem);
+ +    //! adds element to the end of 1d matrix (or possibly multiple elements when _Tp=Mat)
+ +    template<typename _Tp> void push_back(const _Tp& elem);
+ +    template<typename _Tp> void push_back(const Mat_<_Tp>& elem);
+ +    void push_back(const Mat& m);
+ +    //! removes several hyper-planes from bottom of the matrix
+ +    void pop_back(size_t nelems=1);
+ +
+ +    //! locates matrix header within a parent matrix. See below
+ +    void locateROI( Size& wholeSize, Point& ofs ) const;
+ +    //! moves/resizes the current matrix ROI inside the parent matrix.
+ +    Mat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ +    //! extracts a rectangular sub-matrix
+ +    // (this is a generalized form of row, rowRange etc.)
+ +    Mat operator()( Range rowRange, Range colRange ) const;
+ +    Mat operator()( const Rect& roi ) const;
+ +    Mat operator()( const Range* ranges ) const;
+ +
+ +    // //! converts header to CvMat; no data is copied
+ +    // operator CvMat() const;
+ +    // //! converts header to CvMatND; no data is copied
+ +    // operator CvMatND() const;
+ +    // //! converts header to IplImage; no data is copied
+ +    // operator IplImage() const;
+ +
+ +    template<typename _Tp> operator std::vector<_Tp>() const;
+ +    template<typename _Tp, int n> operator Vec<_Tp, n>() const;
+ +    template<typename _Tp, int m, int n> operator Matx<_Tp, m, n>() const;
+ +
+ +    //! returns true iff the matrix data is continuous
+ +    // (i.e. when there are no gaps between successive rows).
+ +    // similar to CV_IS_MAT_CONT(cvmat->type)
+ +    bool isContinuous() const;
+ +
+ +    //! returns true if the matrix is a submatrix of another matrix
+ +    bool isSubmatrix() const;
+ +
+ +    //! returns element size in bytes,
+ +    // similar to CV_ELEM_SIZE(cvmat->type)
+ +    size_t elemSize() const;
+ +    //! returns the size of element channel in bytes.
+ +    size_t elemSize1() const;
+ +    //! returns element type, similar to CV_MAT_TYPE(cvmat->type)
+ +    int type() const;
+ +    //! returns element type, similar to CV_MAT_DEPTH(cvmat->type)
+ +    int depth() const;
+ +    //! returns element type, similar to CV_MAT_CN(cvmat->type)
+ +    int channels() const;
+ +    //! returns step/elemSize1()
+ +    size_t step1(int i=0) const;
+ +    //! returns true if matrix data is NULL
+ +    bool empty() const;
+ +    //! returns the total number of matrix elements
+ +    size_t total() const;
+ +
+ +    //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
+ +    int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
+ +
+ +    //! returns pointer to i0-th submatrix along the dimension #0
+ +    uchar* ptr(int i0=0);
+ +    const uchar* ptr(int i0=0) const;
+ +
+ +    //! returns pointer to (i0,i1) submatrix along the dimensions #0 and #1
+ +    uchar* ptr(int i0, int i1);
+ +    const uchar* ptr(int i0, int i1) const;
+ +
+ +    //! returns pointer to (i0,i1,i3) submatrix along the dimensions #0, #1, #2
+ +    uchar* ptr(int i0, int i1, int i2);
+ +    const uchar* ptr(int i0, int i1, int i2) const;
+ +
+ +    //! returns pointer to the matrix element
+ +    uchar* ptr(const int* idx);
+ +    //! returns read-only pointer to the matrix element
+ +    const uchar* ptr(const int* idx) const;
+ +
+ +    template<int n> uchar* ptr(const Vec<int, n>& idx);
+ +    template<int n> const uchar* ptr(const Vec<int, n>& idx) const;
+ +
+ +    //! template version of the above method
+ +    template<typename _Tp> _Tp* ptr(int i0=0);
+ +    template<typename _Tp> const _Tp* ptr(int i0=0) const;
+ +
+ +    template<typename _Tp> _Tp* ptr(int i0, int i1);
+ +    template<typename _Tp> const _Tp* ptr(int i0, int i1) const;
+ +
+ +    template<typename _Tp> _Tp* ptr(int i0, int i1, int i2);
+ +    template<typename _Tp> const _Tp* ptr(int i0, int i1, int i2) const;
+ +
+ +    template<typename _Tp> _Tp* ptr(const int* idx);
+ +    template<typename _Tp> const _Tp* ptr(const int* idx) const;
+ +
+ +    template<typename _Tp, int n> _Tp* ptr(const Vec<int, n>& idx);
+ +    template<typename _Tp, int n> const _Tp* ptr(const Vec<int, n>& idx) const;
+ +
+ +    //! the same as above, with the pointer dereferencing
+ +    template<typename _Tp> _Tp& at(int i0=0);
+ +    template<typename _Tp> const _Tp& at(int i0=0) const;
+ +
+ +    template<typename _Tp> _Tp& at(int i0, int i1);
+ +    template<typename _Tp> const _Tp& at(int i0, int i1) const;
+ +
+ +    template<typename _Tp> _Tp& at(int i0, int i1, int i2);
+ +    template<typename _Tp> const _Tp& at(int i0, int i1, int i2) const;
+ +
+ +    template<typename _Tp> _Tp& at(const int* idx);
+ +    template<typename _Tp> const _Tp& at(const int* idx) const;
+ +
+ +    template<typename _Tp, int n> _Tp& at(const Vec<int, n>& idx);
+ +    template<typename _Tp, int n> const _Tp& at(const Vec<int, n>& idx) const;
+ +
+ +    //! special versions for 2D arrays (especially convenient for referencing image pixels)
+ +    template<typename _Tp> _Tp& at(Point pt);
+ +    template<typename _Tp> const _Tp& at(Point pt) const;
+ +
+ +    //! template methods for iteration over matrix elements.
+ +    // the iterators take care of skipping gaps in the end of rows (if any)
+ +    template<typename _Tp> MatIterator_<_Tp> begin();
+ +    template<typename _Tp> MatIterator_<_Tp> end();
+ +    template<typename _Tp> MatConstIterator_<_Tp> begin() const;
+ +    template<typename _Tp> MatConstIterator_<_Tp> end() const;
+ +
+ +    enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
+ +    enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
+ +
+ +    /*! includes several bit-fields:
+ +         - the magic signature
+ +         - continuity flag
+ +         - depth
+ +         - number of channels
+ +     */
+ +    int flags;
+ +    //! the matrix dimensionality, >= 2
+ +    int dims;
+ +    //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
+ +    int rows, cols;
+ +    //! pointer to the data
+ +    uchar* data;
+ +
+ +    //! pointer to the reference counter;
+ +    // when matrix points to user-allocated data, the pointer is NULL
+ +    int* refcount;
+ +
+ +    //! helper fields used in locateROI and adjustROI
+ +    uchar* datastart;
+ +    uchar* dataend;
+ +    uchar* datalimit;
+ +
+ +    //! custom allocator
+ +    MatAllocator* allocator;
+ +
+ +    struct CV_EXPORTS MSize
       {
- -        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
- -        p += idx[i]*step.p[i];
- -    }
- -    return p;
- -}
- -
- -inline const uchar* Mat::ptr(const int* idx) const
- -{
- -    int i, d = dims;
- -    uchar* p = data;
- -    CV_DbgAssert( d >= 1 && p );
- -    for( i = 0; i < d; i++ )
+ +        MSize(int* _p);
+ +        Size operator()() const;
+ +        const int& operator[](int i) const;
+ +        int& operator[](int i);
+ +        operator const int*() const;
+ +        bool operator == (const MSize& sz) const;
+ +        bool operator != (const MSize& sz) const;
+ +
+ +        int* p;
+ +    };
+ +
+ +    struct CV_EXPORTS MStep
       {
- -        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
- -        p += idx[i]*step.p[i];
- -    }
- -    return p;
- -}
- -
- -template<typename _Tp> inline _Tp& Mat::at(int i0, int i1)
- -{
- -    CV_DbgAssert( dims <= 2 && data && (unsigned)i0 < (unsigned)size.p[0] &&
- -        (unsigned)(i1*DataType<_Tp>::channels) < (unsigned)(size.p[1]*channels()) &&
- -        CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
- -    return ((_Tp*)(data + step.p[0]*i0))[i1];
- -}
- -
- -template<typename _Tp> inline const _Tp& Mat::at(int i0, int i1) const
- -{
- -    CV_DbgAssert( dims <= 2 && data && (unsigned)i0 < (unsigned)size.p[0] &&
- -        (unsigned)(i1*DataType<_Tp>::channels) < (unsigned)(size.p[1]*channels()) &&
- -        CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
- -    return ((const _Tp*)(data + step.p[0]*i0))[i1];
- -}
- -
- -template<typename _Tp> inline _Tp& Mat::at(Point pt)
- -{
- -    CV_DbgAssert( dims <= 2 && data && (unsigned)pt.y < (unsigned)size.p[0] &&
- -        (unsigned)(pt.x*DataType<_Tp>::channels) < (unsigned)(size.p[1]*channels()) &&
- -        CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
- -    return ((_Tp*)(data + step.p[0]*pt.y))[pt.x];
- -}
- -
- -template<typename _Tp> inline const _Tp& Mat::at(Point pt) const
- -{
- -    CV_DbgAssert( dims <= 2 && data && (unsigned)pt.y < (unsigned)size.p[0] &&
- -        (unsigned)(pt.x*DataType<_Tp>::channels) < (unsigned)(size.p[1]*channels()) &&
- -        CV_ELEM_SIZE1(DataType<_Tp>::depth) == elemSize1());
- -    return ((const _Tp*)(data + step.p[0]*pt.y))[pt.x];
- -}
- -
- -template<typename _Tp> inline _Tp& Mat::at(int i0)
- -{
- -    CV_DbgAssert( dims <= 2 && data &&
- -                 (unsigned)i0 < (unsigned)(size.p[0]*size.p[1]) &&
- -                 elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    if( isContinuous() || size.p[0] == 1 )
- -        return ((_Tp*)data)[i0];
- -    if( size.p[1] == 1 )
- -        return *(_Tp*)(data + step.p[0]*i0);
- -    int i = i0/cols, j = i0 - i*cols;
- -    return ((_Tp*)(data + step.p[0]*i))[j];
- -}
- -
- -template<typename _Tp> inline const _Tp& Mat::at(int i0) const
- -{
- -    CV_DbgAssert( dims <= 2 && data &&
- -                 (unsigned)i0 < (unsigned)(size.p[0]*size.p[1]) &&
- -                 elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    if( isContinuous() || size.p[0] == 1 )
- -        return ((const _Tp*)data)[i0];
- -    if( size.p[1] == 1 )
- -        return *(const _Tp*)(data + step.p[0]*i0);
- -    int i = i0/cols, j = i0 - i*cols;
- -    return ((const _Tp*)(data + step.p[0]*i))[j];
- -}
- -
- -template<typename _Tp> inline _Tp& Mat::at(int i0, int i1, int i2)
- -{
- -    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    return *(_Tp*)ptr(i0, i1, i2);
- -}
- -template<typename _Tp> inline const _Tp& Mat::at(int i0, int i1, int i2) const
- -{
- -    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    return *(const _Tp*)ptr(i0, i1, i2);
- -}
- -template<typename _Tp> inline _Tp& Mat::at(const int* idx)
- -{
- -    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    return *(_Tp*)ptr(idx);
- -}
- -template<typename _Tp> inline const _Tp& Mat::at(const int* idx) const
- -{
- -    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    return *(const _Tp*)ptr(idx);
- -}
- -template<typename _Tp, int n> _Tp& Mat::at(const Vec<int, n>& idx)
- -{
- -    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    return *(_Tp*)ptr(idx.val);
- -}
- -template<typename _Tp, int n> inline const _Tp& Mat::at(const Vec<int, n>& idx) const
- -{
- -    CV_DbgAssert( elemSize() == CV_ELEM_SIZE(DataType<_Tp>::type) );
- -    return *(const _Tp*)ptr(idx.val);
- -}
- -
- -
- -template<typename _Tp> inline MatConstIterator_<_Tp> Mat::begin() const
- -{
- -    CV_DbgAssert( elemSize() == sizeof(_Tp) );
- -    return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this);
- -}
- -
- -template<typename _Tp> inline MatConstIterator_<_Tp> Mat::end() const
- -{
- -    CV_DbgAssert( elemSize() == sizeof(_Tp) );
- -    MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this);
- -    it += total();
- -    return it;
- -}
- -
- -template<typename _Tp> inline MatIterator_<_Tp> Mat::begin()
- -{
- -    CV_DbgAssert( elemSize() == sizeof(_Tp) );
- -    return MatIterator_<_Tp>((Mat_<_Tp>*)this);
- -}
+ +        MStep();
+ +        MStep(size_t s);
+ +        const size_t& operator[](int i) const;
+ +        size_t& operator[](int i);
+ +        operator size_t() const;
+ +        MStep& operator = (size_t s);
+ +
+ +        size_t* p;
+ +        size_t buf[2];
+ +    protected:
+ +        MStep& operator = (const MStep&);
+ +    };
+ +
+ +    MSize size;
+ +    MStep step;
+ +
+ +protected:
+ +};
   
- -template<typename _Tp> inline MatIterator_<_Tp> Mat::end()
- -{
- -    CV_DbgAssert( elemSize() == sizeof(_Tp) );
- -    MatIterator_<_Tp> it((Mat_<_Tp>*)this);
- -    it += total();
- -    return it;
- -}
   
- -template<typename _Tp> inline Mat::operator vector<_Tp>() const
- -{
- -    vector<_Tp> v;
- -    copyTo(v);
- -    return v;
- -}
+ +///////////////////////////////// Mat_<_Tp> ////////////////////////////////////
   
- -template<typename _Tp, int n> inline Mat::operator Vec<_Tp, n>() const
+ +/*!
+ + Template matrix class derived from Mat
+ +
+ + The class Mat_ is a "thin" template wrapper on top of cv::Mat. It does not have any extra data fields,
+ + nor it or cv::Mat have any virtual methods and thus references or pointers to these two classes
+ + can be safely converted one to another. But do it with care, for example:
+ +
+ + \code
+ + // create 100x100 8-bit matrix
+ + Mat M(100,100,CV_8U);
+ + // this will compile fine. no any data conversion will be done.
+ + Mat_<float>& M1 = (Mat_<float>&)M;
+ + // the program will likely crash at the statement below
+ + M1(99,99) = 1.f;
+ + \endcode
+ +
+ + While cv::Mat is sufficient in most cases, cv::Mat_ can be more convenient if you use a lot of element
+ + access operations and if you know matrix type at compile time.
+ + Note that cv::Mat::at<_Tp>(int y, int x) and cv::Mat_<_Tp>::operator ()(int y, int x) do absolutely the
+ + same thing and run at the same speed, but the latter is certainly shorter:
+ +
+ + \code
+ + Mat_<double> M(20,20);
+ + for(int i = 0; i < M.rows; i++)
+ +    for(int j = 0; j < M.cols; j++)
+ +       M(i,j) = 1./(i+j+1);
+ + Mat E, V;
+ + eigen(M,E,V);
+ + cout << E.at<double>(0,0)/E.at<double>(M.rows-1,0);
+ + \endcode
+ +
+ + It is easy to use Mat_ for multi-channel images/matrices - just pass cv::Vec as cv::Mat_ template parameter:
+ +
+ + \code
+ + // allocate 320x240 color image and fill it with green (in RGB space)
+ + Mat_<Vec3b> img(240, 320, Vec3b(0,255,0));
+ + // now draw a diagonal white line
+ + for(int i = 0; i < 100; i++)
+ +     img(i,i)=Vec3b(255,255,255);
+ + // and now modify the 2nd (red) channel of each pixel
+ + for(int i = 0; i < img.rows; i++)
+ +    for(int j = 0; j < img.cols; j++)
+ +       img(i,j)[2] ^= (uchar)(i ^ j); // img(y,x)[c] accesses c-th channel of the pixel (x,y)
+ + \endcode
+ +*/
+ +template<typename _Tp> class CV_EXPORTS Mat_ : public Mat
   {
- -    CV_Assert( data && dims <= 2 && (rows == 1 || cols == 1) &&
- -               rows + cols - 1 == n && channels() == 1 );
- -
- -    if( isContinuous() && type() == DataType<_Tp>::type )
- -        return Vec<_Tp, n>((_Tp*)data);
- -    Vec<_Tp, n> v; Mat tmp(rows, cols, DataType<_Tp>::type, v.val);
- -    convertTo(tmp, tmp.type());
- -    return v;
- -}
+ +public:
+ +    typedef _Tp value_type;
+ +    typedef typename DataType<_Tp>::channel_type channel_type;
+ +    typedef MatIterator_<_Tp> iterator;
+ +    typedef MatConstIterator_<_Tp> const_iterator;
+ +
+ +    //! default constructor
+ +    Mat_();
+ +    //! equivalent to Mat(_rows, _cols, DataType<_Tp>::type)
+ +    Mat_(int _rows, int _cols);
+ +    //! constructor that sets each matrix element to specified value
+ +    Mat_(int _rows, int _cols, const _Tp& value);
+ +    //! equivalent to Mat(_size, DataType<_Tp>::type)
+ +    explicit Mat_(Size _size);
+ +    //! constructor that sets each matrix element to specified value
+ +    Mat_(Size _size, const _Tp& value);
+ +    //! n-dim array constructor
+ +    Mat_(int _ndims, const int* _sizes);
+ +    //! n-dim array constructor that sets each matrix element to specified value
+ +    Mat_(int _ndims, const int* _sizes, const _Tp& value);
+ +    //! copy/conversion contructor. If m is of different type, it's converted
+ +    Mat_(const Mat& m);
+ +    //! copy constructor
+ +    Mat_(const Mat_& m);
+ +    //! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type
+ +    Mat_(int _rows, int _cols, _Tp* _data, size_t _step=AUTO_STEP);
+ +    //! constructs n-dim matrix on top of user-allocated data. steps are in bytes(!!!), regardless of the type
+ +    Mat_(int _ndims, const int* _sizes, _Tp* _data, const size_t* _steps=0);
+ +    //! selects a submatrix
+ +    Mat_(const Mat_& m, const Range& rowRange, const Range& colRange=Range::all());
+ +    //! selects a submatrix
+ +    Mat_(const Mat_& m, const Rect& roi);
+ +    //! selects a submatrix, n-dim version
+ +    Mat_(const Mat_& m, const Range* ranges);
+ +    //! from a matrix expression
+ +    explicit Mat_(const MatExpr& e);
+ +    //! makes a matrix out of Vec, std::vector, Point_ or Point3_. The matrix will have a single column
+ +    explicit Mat_(const std::vector<_Tp>& vec, bool copyData=false);
+ +    template<int n> explicit Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool copyData=true);
+ +    template<int m, int n> explicit Mat_(const Matx<typename DataType<_Tp>::channel_type, m, n>& mtx, bool copyData=true);
+ +    explicit Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
+ +    explicit Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
+ +    explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer);
+ +
+ +    Mat_& operator = (const Mat& m);
+ +    Mat_& operator = (const Mat_& m);
+ +    //! set all the elements to s.
+ +    Mat_& operator = (const _Tp& s);
+ +    //! assign a matrix expression
+ +    Mat_& operator = (const MatExpr& e);
+ +
+ +    //! iterators; they are smart enough to skip gaps in the end of rows
+ +    iterator begin();
+ +    iterator end();
+ +    const_iterator begin() const;
+ +    const_iterator end() const;
+ +
+ +    //! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
+ +    void create(int _rows, int _cols);
+ +    //! equivalent to Mat::create(_size, DataType<_Tp>::type)
+ +    void create(Size _size);
+ +    //! equivalent to Mat::create(_ndims, _sizes, DatType<_Tp>::type)
+ +    void create(int _ndims, const int* _sizes);
+ +    //! cross-product
+ +    Mat_ cross(const Mat_& m) const;
+ +    //! data type conversion
+ +    template<typename T2> operator Mat_<T2>() const;
+ +    //! overridden forms of Mat::row() etc.
+ +    Mat_ row(int y) const;
+ +    Mat_ col(int x) const;
+ +    Mat_ diag(int d=0) const;
+ +    Mat_ clone() const;
+ +
+ +    //! overridden forms of Mat::elemSize() etc.
+ +    size_t elemSize() const;
+ +    size_t elemSize1() const;
+ +    int type() const;
+ +    int depth() const;
+ +    int channels() const;
+ +    size_t step1(int i=0) const;
+ +    //! returns step()/sizeof(_Tp)
+ +    size_t stepT(int i=0) const;
+ +
+ +    //! overridden forms of Mat::zeros() etc. Data type is omitted, of course
+ +    static MatExpr zeros(int rows, int cols);
+ +    static MatExpr zeros(Size size);
+ +    static MatExpr zeros(int _ndims, const int* _sizes);
+ +    static MatExpr ones(int rows, int cols);
+ +    static MatExpr ones(Size size);
+ +    static MatExpr ones(int _ndims, const int* _sizes);
+ +    static MatExpr eye(int rows, int cols);
+ +    static MatExpr eye(Size size);
+ +
+ +    //! some more overriden methods
+ +    Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ +    Mat_ operator()( const Range& rowRange, const Range& colRange ) const;
+ +    Mat_ operator()( const Rect& roi ) const;
+ +    Mat_ operator()( const Range* ranges ) const;
+ +
+ +    //! more convenient forms of row and element access operators
+ +    _Tp* operator [](int y);
+ +    const _Tp* operator [](int y) const;
+ +
+ +    //! returns reference to the specified element
+ +    _Tp& operator ()(const int* idx);
+ +    //! returns read-only reference to the specified element
+ +    const _Tp& operator ()(const int* idx) const;
+ +
+ +    //! returns reference to the specified element
+ +    template<int n> _Tp& operator ()(const Vec<int, n>& idx);
+ +    //! returns read-only reference to the specified element
+ +    template<int n> const _Tp& operator ()(const Vec<int, n>& idx) const;
+ +
+ +    //! returns reference to the specified element (1D case)
+ +    _Tp& operator ()(int idx0);
+ +    //! returns read-only reference to the specified element (1D case)
+ +    const _Tp& operator ()(int idx0) const;
+ +    //! returns reference to the specified element (2D case)
+ +    _Tp& operator ()(int idx0, int idx1);
+ +    //! returns read-only reference to the specified element (2D case)
+ +    const _Tp& operator ()(int idx0, int idx1) const;
+ +    //! returns reference to the specified element (3D case)
+ +    _Tp& operator ()(int idx0, int idx1, int idx2);
+ +    //! returns read-only reference to the specified element (3D case)
+ +    const _Tp& operator ()(int idx0, int idx1, int idx2) const;
+ +
+ +    _Tp& operator ()(Point pt);
+ +    const _Tp& operator ()(Point pt) const;
+ +
+ +    //! conversion to vector.
+ +    operator std::vector<_Tp>() const;
+ +    //! conversion to Vec
+ +    template<int n> operator Vec<typename DataType<_Tp>::channel_type, n>() const;
+ +    //! conversion to Matx
+ +    template<int m, int n> operator Matx<typename DataType<_Tp>::channel_type, m, n>() const;
+ +};
   
- -template<typename _Tp, int m, int n> inline Mat::operator Matx<_Tp, m, n>() const
+ +typedef Mat_<uchar> Mat1b;
+ +typedef Mat_<Vec2b> Mat2b;
+ +typedef Mat_<Vec3b> Mat3b;
+ +typedef Mat_<Vec4b> Mat4b;
+ +
+ +typedef Mat_<short> Mat1s;
+ +typedef Mat_<Vec2s> Mat2s;
+ +typedef Mat_<Vec3s> Mat3s;
+ +typedef Mat_<Vec4s> Mat4s;
+ +
+ +typedef Mat_<ushort> Mat1w;
+ +typedef Mat_<Vec2w> Mat2w;
+ +typedef Mat_<Vec3w> Mat3w;
+ +typedef Mat_<Vec4w> Mat4w;
+ +
+ +typedef Mat_<int>   Mat1i;
+ +typedef Mat_<Vec2i> Mat2i;
+ +typedef Mat_<Vec3i> Mat3i;
+ +typedef Mat_<Vec4i> Mat4i;
+ +
+ +typedef Mat_<float> Mat1f;
+ +typedef Mat_<Vec2f> Mat2f;
+ +typedef Mat_<Vec3f> Mat3f;
+ +typedef Mat_<Vec4f> Mat4f;
+ +
+ +typedef Mat_<double> Mat1d;
+ +typedef Mat_<Vec2d> Mat2d;
+ +typedef Mat_<Vec3d> Mat3d;
+ +typedef Mat_<Vec4d> Mat4d;
+ +
+ +
+ +
+ +/////////////////////////// multi-dimensional sparse matrix //////////////////////////
+ +
+ +/*!
+ + Sparse matrix class.
+ +
+ + The class represents multi-dimensional sparse numerical arrays. Such a sparse array can store elements
+ + of any type that cv::Mat is able to store. "Sparse" means that only non-zero elements
+ + are stored (though, as a result of some operations on a sparse matrix, some of its stored elements
+ + can actually become 0. It's user responsibility to detect such elements and delete them using cv::SparseMat::erase().
+ + The non-zero elements are stored in a hash table that grows when it's filled enough,
+ + so that the search time remains O(1) in average. Elements can be accessed using the following methods:
+ +
+ + <ol>
+ + <li>Query operations: cv::SparseMat::ptr() and the higher-level cv::SparseMat::ref(),
+ +      cv::SparseMat::value() and cv::SparseMat::find, for example:
+ + \code
+ + const int dims = 5;
+ + int size[] = {10, 10, 10, 10, 10};
+ + SparseMat sparse_mat(dims, size, CV_32F);
+ + for(int i = 0; i < 1000; i++)
+ + {
+ +     int idx[dims];
+ +     for(int k = 0; k < dims; k++)
+ +        idx[k] = rand()%sparse_mat.size(k);
+ +     sparse_mat.ref<float>(idx) += 1.f;
+ + }
+ + \endcode
+ +
+ + <li>Sparse matrix iterators. Like cv::Mat iterators and unlike cv::Mat iterators, the sparse matrix iterators are STL-style,
+ + that is, the iteration is done as following:
+ + \code
+ + // prints elements of a sparse floating-point matrix and the sum of elements.
+ + SparseMatConstIterator_<float>
+ +        it = sparse_mat.begin<float>(),
+ +        it_end = sparse_mat.end<float>();
+ + double s = 0;
+ + int dims = sparse_mat.dims();
+ + for(; it != it_end; ++it)
+ + {
+ +     // print element indices and the element value
+ +     const Node* n = it.node();
+ +     printf("(")
+ +     for(int i = 0; i < dims; i++)
+ +        printf("%3d%c", n->idx[i], i < dims-1 ? ',' : ')');
+ +     printf(": %f\n", *it);
+ +     s += *it;
+ + }
+ + printf("Element sum is %g\n", s);
+ + \endcode
+ + If you run this loop, you will notice that elements are enumerated
+ + in no any logical order (lexicographical etc.),
+ + they come in the same order as they stored in the hash table, i.e. semi-randomly.
+ +
+ + You may collect pointers to the nodes and sort them to get the proper ordering.
+ + Note, however, that pointers to the nodes may become invalid when you add more
+ + elements to the matrix; this is because of possible buffer reallocation.
+ +
+ + <li>A combination of the above 2 methods when you need to process 2 or more sparse
+ + matrices simultaneously, e.g. this is how you can compute unnormalized
+ + cross-correlation of the 2 floating-point sparse matrices:
+ + \code
+ + double crossCorr(const SparseMat& a, const SparseMat& b)
+ + {
+ +     const SparseMat *_a = &a, *_b = &b;
+ +     // if b contains less elements than a,
+ +     // it's faster to iterate through b
+ +     if(_a->nzcount() > _b->nzcount())
+ +        std::swap(_a, _b);
+ +     SparseMatConstIterator_<float> it = _a->begin<float>(),
+ +                                    it_end = _a->end<float>();
+ +     double ccorr = 0;
+ +     for(; it != it_end; ++it)
+ +     {
+ +         // take the next element from the first matrix
+ +         float avalue = *it;
+ +         const Node* anode = it.node();
+ +         // and try to find element with the same index in the second matrix.
+ +         // since the hash value depends only on the element index,
+ +         // we reuse hashvalue stored in the node
+ +         float bvalue = _b->value<float>(anode->idx,&anode->hashval);
+ +         ccorr += avalue*bvalue;
+ +     }
+ +     return ccorr;
+ + }
+ + \endcode
+ + </ol>
+ +*/
+ +class CV_EXPORTS SparseMat
   {
- -    CV_Assert( data && dims <= 2 && rows == m && cols == n && channels() == 1 );
- -
- -    if( isContinuous() && type() == DataType<_Tp>::type )
- -        return Matx<_Tp, m, n>((_Tp*)data);
- -    Matx<_Tp, m, n> mtx; Mat tmp(rows, cols, DataType<_Tp>::type, mtx.val);
- -    convertTo(tmp, tmp.type());
- -    return mtx;
- -}
+ +public:
+ +    typedef SparseMatIterator iterator;
+ +    typedef SparseMatConstIterator const_iterator;
   
+ +    enum { MAGIC_VAL=0x42FD0000, MAX_DIM=32, HASH_SCALE=0x5bd1e995, HASH_BIT=0x80000000 };
   
- -template<typename _Tp> inline void Mat::push_back(const _Tp& elem)
- -{
- -    if( !data )
+ +    //! the sparse matrix header
+ +    struct CV_EXPORTS Hdr
       {
- -        *this = Mat(1, 1, DataType<_Tp>::type, (void*)&elem).clone();
- -        return;
- -    }
- -    CV_Assert(DataType<_Tp>::type == type() && cols == 1
- -              /* && dims == 2 (cols == 1 implies dims == 2) */);
- -    uchar* tmp = dataend + step[0];
- -    if( !isSubmatrix() && isContinuous() && tmp <= datalimit )
+ +        Hdr(int _dims, const int* _sizes, int _type);
+ +        void clear();
+ +        int refcount;
+ +        int dims;
+ +        int valueOffset;
+ +        size_t nodeSize;
+ +        size_t nodeCount;
+ +        size_t freeList;
+ +        std::vector<uchar> pool;
+ +        std::vector<size_t> hashtab;
+ +        int size[MAX_DIM];
+ +    };
+ +
+ +    //! sparse matrix node - element of a hash table
+ +    struct CV_EXPORTS Node
       {
- -        *(_Tp*)(data + (size.p[0]++)*step.p[0]) = elem;
- -        dataend = tmp;
- -    }
- -    else
- -        push_back_(&elem);
- -}
- -
- -template<typename _Tp> inline void Mat::push_back(const Mat_<_Tp>& m)
- -{
- -    push_back((const Mat&)m);
- -}
- -
- -inline Mat::MSize::MSize(int* _p) : p(_p) {}
- -inline Size Mat::MSize::operator()() const
- -{
- -    CV_DbgAssert(p[-1] <= 2);
- -    return Size(p[1], p[0]);
- -}
- -inline const int& Mat::MSize::operator[](int i) const { return p[i]; }
- -inline int& Mat::MSize::operator[](int i) { return p[i]; }
- -inline Mat::MSize::operator const int*() const { return p; }
- -
- -inline bool Mat::MSize::operator == (const MSize& sz) const
- -{
- -    int d = p[-1], dsz = sz.p[-1];
- -    if( d != dsz )
- -        return false;
- -    if( d == 2 )
- -        return p[0] == sz.p[0] && p[1] == sz.p[1];
- -
- -    for( int i = 0; i < d; i++ )
- -        if( p[i] != sz.p[i] )
- -            return false;
- -    return true;
- -}
- -
- -inline bool Mat::MSize::operator != (const MSize& sz) const
- -{
- -    return !(*this == sz);
- -}
- -
- -inline Mat::MStep::MStep() { p = buf; p[0] = p[1] = 0; }
- -inline Mat::MStep::MStep(size_t s) { p = buf; p[0] = s; p[1] = 0; }
- -inline const size_t& Mat::MStep::operator[](int i) const { return p[i]; }
- -inline size_t& Mat::MStep::operator[](int i) { return p[i]; }
- -inline Mat::MStep::operator size_t() const
- -{
- -    CV_DbgAssert( p == buf );
- -    return buf[0];
- -}
- -inline Mat::MStep& Mat::MStep::operator = (size_t s)
- -{
- -    CV_DbgAssert( p == buf );
- -    buf[0] = s;
- -    return *this;
- -}
- -
- -static inline Mat cvarrToMatND(const CvArr* arr, bool copyData=false, int coiMode=0)
- -{
- -    return cvarrToMat(arr, copyData, true, coiMode);
- -}
- -
- -///////////////////////////////////////////// SVD //////////////////////////////////////////////////////
- -
- -inline SVD::SVD() {}
- -inline SVD::SVD( InputArray m, int flags ) { operator ()(m, flags); }
- -inline void SVD::solveZ( InputArray m, OutputArray _dst )
- -{
- -    Mat mtx = m.getMat();
- -    SVD svd(mtx, (mtx.rows >= mtx.cols ? 0 : SVD::FULL_UV));
- -    _dst.create(svd.vt.cols, 1, svd.vt.type());
- -    Mat dst = _dst.getMat();
- -    svd.vt.row(svd.vt.rows-1).reshape(1,svd.vt.cols).copyTo(dst);
- -}
- -
- -template<typename _Tp, int m, int n, int nm> inline void
- -    SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt )
- -{
- -    assert( nm == MIN(m, n));
- -    Mat _a(a, false), _u(u, false), _w(w, false), _vt(vt, false);
- -    SVD::compute(_a, _w, _u, _vt);
- -    CV_Assert(_w.data == (uchar*)&w.val[0] && _u.data == (uchar*)&u.val[0] && _vt.data == (uchar*)&vt.val[0]);
- -}
- -
- -template<typename _Tp, int m, int n, int nm> inline void
- -SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w )
- -{
- -    assert( nm == MIN(m, n));
- -    Mat _a(a, false), _w(w, false);
- -    SVD::compute(_a, _w);
- -    CV_Assert(_w.data == (uchar*)&w.val[0]);
- -}
- -
- -template<typename _Tp, int m, int n, int nm, int nb> inline void
- -SVD::backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u,
- -                const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs,
- -                Matx<_Tp, n, nb>& dst )
- -{
- -    assert( nm == MIN(m, n));
- -    Mat _u(u, false), _w(w, false), _vt(vt, false), _rhs(rhs, false), _dst(dst, false);
- -    SVD::backSubst(_w, _u, _vt, _rhs, _dst);
- -    CV_Assert(_dst.data == (uchar*)&dst.val[0]);
- -}
- -
- -///////////////////////////////// Mat_<_Tp> ////////////////////////////////////
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_()
- -    : Mat() { flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type; }
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(int _rows, int _cols)
- -    : Mat(_rows, _cols, DataType<_Tp>::type) {}
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(int _rows, int _cols, const _Tp& value)
- -    : Mat(_rows, _cols, DataType<_Tp>::type) { *this = value; }
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(Size _sz)
- -    : Mat(_sz.height, _sz.width, DataType<_Tp>::type) {}
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(Size _sz, const _Tp& value)
- -    : Mat(_sz.height, _sz.width, DataType<_Tp>::type) { *this = value; }
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(int _dims, const int* _sz)
- -    : Mat(_dims, _sz, DataType<_Tp>::type) {}
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(int _dims, const int* _sz, const _Tp& _s)
- -    : Mat(_dims, _sz, DataType<_Tp>::type, Scalar(_s)) {}
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const Range* ranges)
- -    : Mat(m, ranges) {}
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const Mat& m)
- -    : Mat() { flags = (flags & ~CV_MAT_TYPE_MASK) | DataType<_Tp>::type; *this = m; }
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const Mat_& m)
- -    : Mat(m) {}
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(int _rows, int _cols, _Tp* _data, size_t steps)
- -    : Mat(_rows, _cols, DataType<_Tp>::type, _data, steps) {}
- -
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const Mat_& m, const Range& _rowRange, const Range& _colRange)
- -    : Mat(m, _rowRange, _colRange) {}
+ +        //! hash value
+ +        size_t hashval;
+ +        //! index of the next node in the same hash table entry
+ +        size_t next;
+ +        //! index of the matrix element
+ +        int idx[MAX_DIM];
+ +    };
+ +
+ +    //! default constructor
+ +    SparseMat();
+ +    //! creates matrix of the specified size and type
+ +    SparseMat(int dims, const int* _sizes, int _type);
+ +    //! copy constructor
+ +    SparseMat(const SparseMat& m);
+ +    //! converts dense 2d matrix to the sparse form
+ +    /*!
+ +     \param m the input matrix
+ +     \param try1d if true and m is a single-column matrix (Nx1),
+ +            then the sparse matrix will be 1-dimensional.
+ +    */
+ +    explicit SparseMat(const Mat& m);
+ +    //! converts old-style sparse matrix to the new-style. All the data is copied
+ +    //SparseMat(const CvSparseMat* m);
+ +    //! the destructor
+ +    ~SparseMat();
+ +
+ +    //! assignment operator. This is O(1) operation, i.e. no data is copied
+ +    SparseMat& operator = (const SparseMat& m);
+ +    //! equivalent to the corresponding constructor
+ +    SparseMat& operator = (const Mat& m);
+ +
+ +    //! creates full copy of the matrix
+ +    SparseMat clone() const;
+ +
+ +    //! copies all the data to the destination matrix. All the previous content of m is erased
+ +    void copyTo( SparseMat& m ) const;
+ +    //! converts sparse matrix to dense matrix.
+ +    void copyTo( Mat& m ) const;
+ +    //! multiplies all the matrix elements by the specified scale factor alpha and converts the results to the specified data type
+ +    void convertTo( SparseMat& m, int rtype, double alpha=1 ) const;
+ +    //! converts sparse matrix to dense n-dim matrix with optional type conversion and scaling.
+ +    /*!
+ +      \param rtype The output matrix data type. When it is =-1, the output array will have the same data type as (*this)
+ +      \param alpha The scale factor
+ +      \param beta The optional delta added to the scaled values before the conversion
+ +    */
+ +    void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const;
+ +
+ +    // not used now
+ +    void assignTo( SparseMat& m, int type=-1 ) const;
+ +
+ +    //! reallocates sparse matrix.
+ +    /*!
+ +        If the matrix already had the proper size and type,
+ +        it is simply cleared with clear(), otherwise,
+ +        the old matrix is released (using release()) and the new one is allocated.
+ +    */
+ +    void create(int dims, const int* _sizes, int _type);
+ +    //! sets all the sparse matrix elements to 0, which means clearing the hash table.
+ +    void clear();
+ +    //! manually increments the reference counter to the header.
+ +    void addref();
+ +    // decrements the header reference counter. When the counter reaches 0, the header and all the underlying data are deallocated.
+ +    void release();
+ +
+ +    //! converts sparse matrix to the old-style representation; all the elements are copied.
+ +    //operator CvSparseMat*() const;
+ +    //! returns the size of each element in bytes (not including the overhead - the space occupied by SparseMat::Node elements)
+ +    size_t elemSize() const;
+ +    //! returns elemSize()/channels()
+ +    size_t elemSize1() const;
+ +
+ +    //! returns type of sparse matrix elements
+ +    int type() const;
+ +    //! returns the depth of sparse matrix elements
+ +    int depth() const;
+ +    //! returns the number of channels
+ +    int channels() const;
+ +
+ +    //! returns the array of sizes, or NULL if the matrix is not allocated
+ +    const int* size() const;
+ +    //! returns the size of i-th matrix dimension (or 0)
+ +    int size(int i) const;
+ +    //! returns the matrix dimensionality
+ +    int dims() const;
+ +    //! returns the number of non-zero elements (=the number of hash table nodes)
+ +    size_t nzcount() const;
+ +
+ +    //! computes the element hash value (1D case)
+ +    size_t hash(int i0) const;
+ +    //! computes the element hash value (2D case)
+ +    size_t hash(int i0, int i1) const;
+ +    //! computes the element hash value (3D case)
+ +    size_t hash(int i0, int i1, int i2) const;
+ +    //! computes the element hash value (nD case)
+ +    size_t hash(const int* idx) const;
+ +
+ +    //@{
+ +    /*!
+ +     specialized variants for 1D, 2D, 3D cases and the generic_type one for n-D case.
+ +
+ +     return pointer to the matrix element.
+ +     <ul>
+ +      <li>if the element is there (it's non-zero), the pointer to it is returned
+ +      <li>if it's not there and createMissing=false, NULL pointer is returned
+ +      <li>if it's not there and createMissing=true, then the new element
+ +        is created and initialized with 0. Pointer to it is returned
+ +      <li>if the optional hashval pointer is not NULL, the element hash value is
+ +      not computed, but *hashval is taken instead.
+ +     </ul>
+ +    */
+ +    //! returns pointer to the specified element (1D case)
+ +    uchar* ptr(int i0, bool createMissing, size_t* hashval=0);
+ +    //! returns pointer to the specified element (2D case)
+ +    uchar* ptr(int i0, int i1, bool createMissing, size_t* hashval=0);
+ +    //! returns pointer to the specified element (3D case)
+ +    uchar* ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0);
+ +    //! returns pointer to the specified element (nD case)
+ +    uchar* ptr(const int* idx, bool createMissing, size_t* hashval=0);
+ +    //@}
+ +
+ +    //@{
+ +    /*!
+ +     return read-write reference to the specified sparse matrix element.
+ +
+ +     ref<_Tp>(i0,...[,hashval]) is equivalent to *(_Tp*)ptr(i0,...,true[,hashval]).
+ +     The methods always return a valid reference.
+ +     If the element did not exist, it is created and initialiazed with 0.
+ +    */
+ +    //! returns reference to the specified element (1D case)
+ +    template<typename _Tp> _Tp& ref(int i0, size_t* hashval=0);
+ +    //! returns reference to the specified element (2D case)
+ +    template<typename _Tp> _Tp& ref(int i0, int i1, size_t* hashval=0);
+ +    //! returns reference to the specified element (3D case)
+ +    template<typename _Tp> _Tp& ref(int i0, int i1, int i2, size_t* hashval=0);
+ +    //! returns reference to the specified element (nD case)
+ +    template<typename _Tp> _Tp& ref(const int* idx, size_t* hashval=0);
+ +    //@}
+ +
+ +    //@{
+ +    /*!
+ +     return value of the specified sparse matrix element.
+ +
+ +     value<_Tp>(i0,...[,hashval]) is equivalent
+ +
+ +     \code
+ +     { const _Tp* p = find<_Tp>(i0,...[,hashval]); return p ? *p : _Tp(); }
+ +     \endcode
+ +
+ +     That is, if the element did not exist, the methods return 0.
+ +     */
+ +    //! returns value of the specified element (1D case)
+ +    template<typename _Tp> _Tp value(int i0, size_t* hashval=0) const;
+ +    //! returns value of the specified element (2D case)
+ +    template<typename _Tp> _Tp value(int i0, int i1, size_t* hashval=0) const;
+ +    //! returns value of the specified element (3D case)
+ +    template<typename _Tp> _Tp value(int i0, int i1, int i2, size_t* hashval=0) const;
+ +    //! returns value of the specified element (nD case)
+ +    template<typename _Tp> _Tp value(const int* idx, size_t* hashval=0) const;
+ +    //@}
+ +
+ +    //@{
+ +    /*!
+ +     Return pointer to the specified sparse matrix element if it exists
+ +
+ +     find<_Tp>(i0,...[,hashval]) is equivalent to (_const Tp*)ptr(i0,...false[,hashval]).
+ +
+ +     If the specified element does not exist, the methods return NULL.
+ +    */
+ +    //! returns pointer to the specified element (1D case)
+ +    template<typename _Tp> const _Tp* find(int i0, size_t* hashval=0) const;
+ +    //! returns pointer to the specified element (2D case)
+ +    template<typename _Tp> const _Tp* find(int i0, int i1, size_t* hashval=0) const;
+ +    //! returns pointer to the specified element (3D case)
+ +    template<typename _Tp> const _Tp* find(int i0, int i1, int i2, size_t* hashval=0) const;
+ +    //! returns pointer to the specified element (nD case)
+ +    template<typename _Tp> const _Tp* find(const int* idx, size_t* hashval=0) const;
+ +
+ +    //! erases the specified element (2D case)
+ +    void erase(int i0, int i1, size_t* hashval=0);
+ +    //! erases the specified element (3D case)
+ +    void erase(int i0, int i1, int i2, size_t* hashval=0);
+ +    //! erases the specified element (nD case)
+ +    void erase(const int* idx, size_t* hashval=0);
+ +
+ +    //@{
+ +    /*!
+ +       return the sparse matrix iterator pointing to the first sparse matrix element
+ +    */
+ +    //! returns the sparse matrix iterator at the matrix beginning
+ +    SparseMatIterator begin();
+ +    //! returns the sparse matrix iterator at the matrix beginning
+ +    template<typename _Tp> SparseMatIterator_<_Tp> begin();
+ +    //! returns the read-only sparse matrix iterator at the matrix beginning
+ +    SparseMatConstIterator begin() const;
+ +    //! returns the read-only sparse matrix iterator at the matrix beginning
+ +    template<typename _Tp> SparseMatConstIterator_<_Tp> begin() const;
+ +    //@}
+ +    /*!
+ +       return the sparse matrix iterator pointing to the element following the last sparse matrix element
+ +    */
+ +    //! returns the sparse matrix iterator at the matrix end
+ +    SparseMatIterator end();
+ +    //! returns the read-only sparse matrix iterator at the matrix end
+ +    SparseMatConstIterator end() const;
+ +    //! returns the typed sparse matrix iterator at the matrix end
+ +    template<typename _Tp> SparseMatIterator_<_Tp> end();
+ +    //! returns the typed read-only sparse matrix iterator at the matrix end
+ +    template<typename _Tp> SparseMatConstIterator_<_Tp> end() const;
+ +
+ +    //! returns the value stored in the sparse martix node
+ +    template<typename _Tp> _Tp& value(Node* n);
+ +    //! returns the value stored in the sparse martix node
+ +    template<typename _Tp> const _Tp& value(const Node* n) const;
+ +
+ +    ////////////// some internal-use methods ///////////////
+ +    Node* node(size_t nidx);
+ +    const Node* node(size_t nidx) const;
+ +
+ +    uchar* newNode(const int* idx, size_t hashval);
+ +    void removeNode(size_t hidx, size_t nidx, size_t previdx);
+ +    void resizeHashTab(size_t newsize);
   
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const Mat_& m, const Rect& roi)
- -    : Mat(m, roi) {}
+ +    int flags;
+ +    Hdr* hdr;
+ +};
   
- -template<typename _Tp> template<int n> inline
- -    Mat_<_Tp>::Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool copyData)
- -    : Mat(n/DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&vec)
- -{
- -    CV_Assert(n%DataType<_Tp>::channels == 0);
- -    if( copyData )
- -        *this = clone();
- -}
- -
- -template<typename _Tp> template<int m, int n> inline
- -    Mat_<_Tp>::Mat_(const Matx<typename DataType<_Tp>::channel_type,m,n>& M, bool copyData)
- -    : Mat(m, n/DataType<_Tp>::channels, DataType<_Tp>::type, (void*)&M)
- -{
- -    CV_Assert(n % DataType<_Tp>::channels == 0);
- -    if( copyData )
- -        *this = clone();
- -}
   
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
- -    : Mat(2/DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt)
- -{
- -    CV_Assert(2 % DataType<_Tp>::channels == 0);
- -    if( copyData )
- -        *this = clone();
- -}
   
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
- -    : Mat(3/DataType<_Tp>::channels, 1, DataType<_Tp>::type, (void*)&pt)
- -{
- -    CV_Assert(3 % DataType<_Tp>::channels == 0);
- -    if( copyData )
- -        *this = clone();
- -}
+ +///////////////////////////////// SparseMat_<_Tp> ////////////////////////////////////
   
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const MatCommaInitializer_<_Tp>& commaInitializer)
- -    : Mat(commaInitializer) {}
+ +/*!
+ + The Template Sparse Matrix class derived from cv::SparseMat
   
- -template<typename _Tp> inline Mat_<_Tp>::Mat_(const vector<_Tp>& vec, bool copyData)
- -    : Mat(vec, copyData) {}
+ + The class provides slightly more convenient operations for accessing elements.
   
- -template<typename _Tp> inline Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m)
- -{
- -    if( DataType<_Tp>::type == m.type() )
- -    {
- -        Mat::operator = (m);
- -        return *this;
- -    }
- -    if( DataType<_Tp>::depth == m.depth() )
- -    {
- -        return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0));
- -    }
- -    CV_DbgAssert(DataType<_Tp>::channels == m.channels());
- -    m.convertTo(*this, type());
- -    return *this;
- -}
- -
- -template<typename _Tp> inline Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat_& m)
+ + \code
+ + SparseMat m;
+ + ...
+ + SparseMat_<int> m_ = (SparseMat_<int>&)m;
+ + m_.ref(1)++; // equivalent to m.ref<int>(1)++;
+ + m_.ref(2) += m_(3); // equivalent to m.ref<int>(2) += m.value<int>(3);
+ + \endcode
+ +*/
+ +template<typename _Tp> class CV_EXPORTS SparseMat_ : public SparseMat
   {
- -    Mat::operator=(m);
- -    return *this;
- -}
+ +public:
+ +    typedef SparseMatIterator_<_Tp> iterator;
+ +    typedef SparseMatConstIterator_<_Tp> const_iterator;
+ +
+ +    //! the default constructor
+ +    SparseMat_();
+ +    //! the full constructor equivelent to SparseMat(dims, _sizes, DataType<_Tp>::type)
+ +    SparseMat_(int dims, const int* _sizes);
+ +    //! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted
+ +    SparseMat_(const SparseMat& m);
+ +    //! the copy constructor. This is O(1) operation - no data is copied
+ +    SparseMat_(const SparseMat_& m);
+ +    //! converts dense matrix to the sparse form
+ +    SparseMat_(const Mat& m);
+ +    //! converts the old-style sparse matrix to the C++ class. All the elements are copied
+ +    //SparseMat_(const CvSparseMat* m);
+ +    //! the assignment operator. If DataType<_Tp>.type != m.type(), the m elements are converted
+ +    SparseMat_& operator = (const SparseMat& m);
+ +    //! the assignment operator. This is O(1) operation - no data is copied
+ +    SparseMat_& operator = (const SparseMat_& m);
+ +    //! converts dense matrix to the sparse form
+ +    SparseMat_& operator = (const Mat& m);
+ +
+ +    //! makes full copy of the matrix. All the elements are duplicated
+ +    SparseMat_ clone() const;
+ +    //! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type)
+ +    void create(int dims, const int* _sizes);
+ +    //! converts sparse matrix to the old-style CvSparseMat. All the elements are copied
+ +    //operator CvSparseMat*() const;
+ +
+ +    //! returns type of the matrix elements
+ +    int type() const;
+ +    //! returns depth of the matrix elements
+ +    int depth() const;
+ +    //! returns the number of channels in each matrix element
+ +    int channels() const;
+ +
+ +    //! equivalent to SparseMat::ref<_Tp>(i0, hashval)
+ +    _Tp& ref(int i0, size_t* hashval=0);
+ +    //! equivalent to SparseMat::ref<_Tp>(i0, i1, hashval)
+ +    _Tp& ref(int i0, int i1, size_t* hashval=0);
+ +    //! equivalent to SparseMat::ref<_Tp>(i0, i1, i2, hashval)
+ +    _Tp& ref(int i0, int i1, int i2, size_t* hashval=0);
+ +    //! equivalent to SparseMat::ref<_Tp>(idx, hashval)
+ +    _Tp& ref(const int* idx, size_t* hashval=0);
+ +
+ +    //! equivalent to SparseMat::value<_Tp>(i0, hashval)
+ +    _Tp operator()(int i0, size_t* hashval=0) const;
+ +    //! equivalent to SparseMat::value<_Tp>(i0, i1, hashval)
+ +    _Tp operator()(int i0, int i1, size_t* hashval=0) const;
+ +    //! equivalent to SparseMat::value<_Tp>(i0, i1, i2, hashval)
+ +    _Tp operator()(int i0, int i1, int i2, size_t* hashval=0) const;
+ +    //! equivalent to SparseMat::value<_Tp>(idx, hashval)
+ +    _Tp operator()(const int* idx, size_t* hashval=0) const;
+ +
+ +    //! returns sparse matrix iterator pointing to the first sparse matrix element
+ +    SparseMatIterator_<_Tp> begin();
+ +    //! returns read-only sparse matrix iterator pointing to the first sparse matrix element
+ +    SparseMatConstIterator_<_Tp> begin() const;
+ +    //! returns sparse matrix iterator pointing to the element following the last sparse matrix element
+ +    SparseMatIterator_<_Tp> end();
+ +    //! returns read-only sparse matrix iterator pointing to the element following the last sparse matrix element
+ +    SparseMatConstIterator_<_Tp> end() const;
+ +};
   
- -template<typename _Tp> inline Mat_<_Tp>& Mat_<_Tp>::operator = (const _Tp& s)
- -{
- -    typedef typename DataType<_Tp>::vec_type VT;
- -    Mat::operator=(Scalar((const VT&)s));
- -    return *this;
- -}
   
- -template<typename _Tp> inline void Mat_<_Tp>::create(int _rows, int _cols)
- -{
- -    Mat::create(_rows, _cols, DataType<_Tp>::type);
- -}
   
- -template<typename _Tp> inline void Mat_<_Tp>::create(Size _sz)
- -{
- -    Mat::create(_sz, DataType<_Tp>::type);
- -}
+ +////////////////////////////////// MatConstIterator //////////////////////////////////
   
- -template<typename _Tp> inline void Mat_<_Tp>::create(int _dims, const int* _sz)
+ +class CV_EXPORTS MatConstIterator
   {
- -    Mat::create(_dims, _sz, DataType<_Tp>::type);
- -}
+ +public:
+ +    typedef uchar* value_type;
+ +    typedef ptrdiff_t difference_type;
+ +    typedef const uchar** pointer;
+ +    typedef uchar* reference;
   
+ +#ifndef OPENCV_NOSTL
+ +    typedef std::random_access_iterator_tag iterator_category;
+ +#endif
   
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const
- -{ return Mat_<_Tp>(Mat::cross(m)); }
+ +    //! default constructor
+ +    MatConstIterator();
+ +    //! constructor that sets the iterator to the beginning of the matrix
+ +    MatConstIterator(const Mat* _m);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatConstIterator(const Mat* _m, int _row, int _col=0);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatConstIterator(const Mat* _m, Point _pt);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatConstIterator(const Mat* _m, const int* _idx);
+ +    //! copy constructor
+ +    MatConstIterator(const MatConstIterator& it);
+ +
+ +    //! copy operator
+ +    MatConstIterator& operator = (const MatConstIterator& it);
+ +    //! returns the current matrix element
+ +    uchar* operator *() const;
+ +    //! returns the i-th matrix element, relative to the current
+ +    uchar* operator [](ptrdiff_t i) const;
+ +
+ +    //! shifts the iterator forward by the specified number of elements
+ +    MatConstIterator& operator += (ptrdiff_t ofs);
+ +    //! shifts the iterator backward by the specified number of elements
+ +    MatConstIterator& operator -= (ptrdiff_t ofs);
+ +    //! decrements the iterator
+ +    MatConstIterator& operator --();
+ +    //! decrements the iterator
+ +    MatConstIterator operator --(int);
+ +    //! increments the iterator
+ +    MatConstIterator& operator ++();
+ +    //! increments the iterator
+ +    MatConstIterator operator ++(int);
+ +    //! returns the current iterator position
+ +    Point pos() const;
+ +    //! returns the current iterator position
+ +    void pos(int* _idx) const;
+ +
+ +    ptrdiff_t lpos() const;
+ +    void seek(ptrdiff_t ofs, bool relative = false);
+ +    void seek(const int* _idx, bool relative = false);
+ +
+ +    const Mat* m;
+ +    size_t elemSize;
+ +    uchar* ptr;
+ +    uchar* sliceStart;
+ +    uchar* sliceEnd;
+ +};
   
- -template<typename _Tp> template<typename T2> inline Mat_<_Tp>::operator Mat_<T2>() const
- -{ return Mat_<T2>(*this); }
   
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::row(int y) const
- -{ return Mat_(*this, Range(y, y+1), Range::all()); }
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::col(int x) const
- -{ return Mat_(*this, Range::all(), Range(x, x+1)); }
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::diag(int d) const
- -{ return Mat_(Mat::diag(d)); }
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::clone() const
- -{ return Mat_(Mat::clone()); }
   
- -template<typename _Tp> inline size_t Mat_<_Tp>::elemSize() const
- -{
- -    CV_DbgAssert( Mat::elemSize() == sizeof(_Tp) );
- -    return sizeof(_Tp);
- -}
+ +////////////////////////////////// MatConstIterator_ /////////////////////////////////
   
- -template<typename _Tp> inline size_t Mat_<_Tp>::elemSize1() const
+ +/*!
+ + Matrix read-only iterator
+ + */
+ +template<typename _Tp>
+ +class MatConstIterator_ : public MatConstIterator
   {
- -    CV_DbgAssert( Mat::elemSize1() == sizeof(_Tp)/DataType<_Tp>::channels );
- -    return sizeof(_Tp)/DataType<_Tp>::channels;
- -}
- -template<typename _Tp> inline int Mat_<_Tp>::type() const
- -{
- -    CV_DbgAssert( Mat::type() == DataType<_Tp>::type );
- -    return DataType<_Tp>::type;
- -}
- -template<typename _Tp> inline int Mat_<_Tp>::depth() const
- -{
- -    CV_DbgAssert( Mat::depth() == DataType<_Tp>::depth );
- -    return DataType<_Tp>::depth;
- -}
- -template<typename _Tp> inline int Mat_<_Tp>::channels() const
- -{
- -    CV_DbgAssert( Mat::channels() == DataType<_Tp>::channels );
- -    return DataType<_Tp>::channels;
- -}
- -template<typename _Tp> inline size_t Mat_<_Tp>::stepT(int i) const { return step.p[i]/elemSize(); }
- -template<typename _Tp> inline size_t Mat_<_Tp>::step1(int i) const { return step.p[i]/elemSize1(); }
+ +public:
+ +    typedef _Tp value_type;
+ +    typedef ptrdiff_t difference_type;
+ +    typedef const _Tp* pointer;
+ +    typedef const _Tp& reference;
   
- -template<typename _Tp> inline Mat_<_Tp>& Mat_<_Tp>::adjustROI( int dtop, int dbottom, int dleft, int dright )
- -{ return (Mat_<_Tp>&)(Mat::adjustROI(dtop, dbottom, dleft, dright));  }
+ +#ifndef OPENCV_NOSTL
+ +    typedef std::random_access_iterator_tag iterator_category;
+ +#endif
   
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::operator()( const Range& _rowRange, const Range& _colRange ) const
- -{ return Mat_<_Tp>(*this, _rowRange, _colRange); }
+ +    //! default constructor
+ +    MatConstIterator_();
+ +    //! constructor that sets the iterator to the beginning of the matrix
+ +    MatConstIterator_(const Mat_<_Tp>* _m);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col=0);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatConstIterator_(const Mat_<_Tp>* _m, Point _pt);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatConstIterator_(const Mat_<_Tp>* _m, const int* _idx);
+ +    //! copy constructor
+ +    MatConstIterator_(const MatConstIterator_& it);
+ +
+ +    //! copy operator
+ +    MatConstIterator_& operator = (const MatConstIterator_& it);
+ +    //! returns the current matrix element
+ +    _Tp operator *() const;
+ +    //! returns the i-th matrix element, relative to the current
+ +    _Tp operator [](ptrdiff_t i) const;
+ +
+ +    //! shifts the iterator forward by the specified number of elements
+ +    MatConstIterator_& operator += (ptrdiff_t ofs);
+ +    //! shifts the iterator backward by the specified number of elements
+ +    MatConstIterator_& operator -= (ptrdiff_t ofs);
+ +    //! decrements the iterator
+ +    MatConstIterator_& operator --();
+ +    //! decrements the iterator
+ +    MatConstIterator_ operator --(int);
+ +    //! increments the iterator
+ +    MatConstIterator_& operator ++();
+ +    //! increments the iterator
+ +    MatConstIterator_ operator ++(int);
+ +    //! returns the current iterator position
+ +    Point pos() const;
+ +};
   
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::operator()( const Rect& roi ) const
- -{ return Mat_<_Tp>(*this, roi); }
   
- -template<typename _Tp> inline Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const
- -{ return Mat_<_Tp>(*this, ranges); }
   
- -template<typename _Tp> inline _Tp* Mat_<_Tp>::operator [](int y)
- -{ return (_Tp*)ptr(y); }
- -template<typename _Tp> inline const _Tp* Mat_<_Tp>::operator [](int y) const
- -{ return (const _Tp*)ptr(y); }
+ +//////////////////////////////////// MatIterator_ ////////////////////////////////////
   
- -template<typename _Tp> inline _Tp& Mat_<_Tp>::operator ()(int i0, int i1)
- -{
- -    CV_DbgAssert( dims <= 2 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] &&
- -                  type() == DataType<_Tp>::type );
- -    return ((_Tp*)(data + step.p[0]*i0))[i1];
- -}
- -
- -template<typename _Tp> inline const _Tp& Mat_<_Tp>::operator ()(int i0, int i1) const
- -{
- -    CV_DbgAssert( dims <= 2 && data &&
- -                  (unsigned)i0 < (unsigned)size.p[0] &&
- -                  (unsigned)i1 < (unsigned)size.p[1] &&
- -                  type() == DataType<_Tp>::type );
- -    return ((const _Tp*)(data + step.p[0]*i0))[i1];
- -}
- -
- -template<typename _Tp> inline _Tp& Mat_<_Tp>::operator ()(Point pt)
+ +/*!
+ + Matrix read-write iterator
+ +*/
+ +template<typename _Tp>
+ +class MatIterator_ : public MatConstIterator_<_Tp>
   {
- -    CV_DbgAssert( dims <= 2 && data &&
- -                  (unsigned)pt.y < (unsigned)size.p[0] &&
- -                  (unsigned)pt.x < (unsigned)size.p[1] &&
- -                  type() == DataType<_Tp>::type );
- -    return ((_Tp*)(data + step.p[0]*pt.y))[pt.x];
- -}
- -
- -template<typename _Tp> inline const _Tp& Mat_<_Tp>::operator ()(Point pt) const
- -{
- -    CV_DbgAssert( dims <= 2 && data &&
- -                  (unsigned)pt.y < (unsigned)size.p[0] &&
- -                  (unsigned)pt.x < (unsigned)size.p[1] &&
- -                 type() == DataType<_Tp>::type );
- -    return ((const _Tp*)(data + step.p[0]*pt.y))[pt.x];
- -}
- -
- -template<typename _Tp> inline _Tp& Mat_<_Tp>::operator ()(const int* idx)
- -{
- -    return Mat::at<_Tp>(idx);
- -}
+ +public:
+ +    typedef _Tp* pointer;
+ +    typedef _Tp& reference;
   
- -template<typename _Tp> inline const _Tp& Mat_<_Tp>::operator ()(const int* idx) const
- -{
- -    return Mat::at<_Tp>(idx);
- -}
+ +#ifndef OPENCV_NOSTL
+ +    typedef std::random_access_iterator_tag iterator_category;
+ +#endif
   
- -template<typename _Tp> template<int n> inline _Tp& Mat_<_Tp>::operator ()(const Vec<int, n>& idx)
- -{
- -    return Mat::at<_Tp>(idx);
- -}
+ +    //! the default constructor
+ +    MatIterator_();
+ +    //! constructor that sets the iterator to the beginning of the matrix
+ +    MatIterator_(Mat_<_Tp>* _m);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatIterator_(Mat_<_Tp>* _m, int _row, int _col=0);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatIterator_(const Mat_<_Tp>* _m, Point _pt);
+ +    //! constructor that sets the iterator to the specified element of the matrix
+ +    MatIterator_(const Mat_<_Tp>* _m, const int* _idx);
+ +    //! copy constructor
+ +    MatIterator_(const MatIterator_& it);
+ +    //! copy operator
+ +    MatIterator_& operator = (const MatIterator_<_Tp>& it );
+ +
+ +    //! returns the current matrix element
+ +    _Tp& operator *() const;
+ +    //! returns the i-th matrix element, relative to the current
+ +    _Tp& operator [](ptrdiff_t i) const;
+ +
+ +    //! shifts the iterator forward by the specified number of elements
+ +    MatIterator_& operator += (ptrdiff_t ofs);
+ +    //! shifts the iterator backward by the specified number of elements
+ +    MatIterator_& operator -= (ptrdiff_t ofs);
+ +    //! decrements the iterator
+ +    MatIterator_& operator --();
+ +    //! decrements the iterator
+ +    MatIterator_ operator --(int);
+ +    //! increments the iterator
+ +    MatIterator_& operator ++();
+ +    //! increments the iterator
+ +    MatIterator_ operator ++(int);
+ +};
   
- -template<typename _Tp> template<int n> inline const _Tp& Mat_<_Tp>::operator ()(const Vec<int, n>& idx) const
- -{
- -    return Mat::at<_Tp>(idx);
- -}
   
- -template<typename _Tp> inline _Tp& Mat_<_Tp>::operator ()(int i0)
- -{
- -    return this->at<_Tp>(i0);
- -}
   
- -template<typename _Tp> inline const _Tp& Mat_<_Tp>::operator ()(int i0) const
- -{
- -    return this->at<_Tp>(i0);
- -}
+ +/////////////////////////////// SparseMatConstIterator ///////////////////////////////
   
- -template<typename _Tp> inline _Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2)
- -{
- -    return this->at<_Tp>(i0, i1, i2);
- -}
+ +/*!
+ + Read-Only Sparse Matrix Iterator.
+ + Here is how to use the iterator to compute the sum of floating-point sparse matrix elements:
   
- -template<typename _Tp> inline const _Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2) const
+ + \code
+ + SparseMatConstIterator it = m.begin(), it_end = m.end();
+ + double s = 0;
+ + CV_Assert( m.type() == CV_32F );
+ + for( ; it != it_end; ++it )
+ +    s += it.value<float>();
+ + \endcode
+ +*/
+ +class CV_EXPORTS SparseMatConstIterator
   {
- -    return this->at<_Tp>(i0, i1, i2);
- -}
+ +public:
+ +    //! the default constructor
+ +    SparseMatConstIterator();
+ +    //! the full constructor setting the iterator to the first sparse matrix element
+ +    SparseMatConstIterator(const SparseMat* _m);
+ +    //! the copy constructor
+ +    SparseMatConstIterator(const SparseMatConstIterator& it);
+ +
+ +    //! the assignment operator
+ +    SparseMatConstIterator& operator = (const SparseMatConstIterator& it);
+ +
+ +    //! template method returning the current matrix element
+ +    template<typename _Tp> const _Tp& value() const;
+ +    //! returns the current node of the sparse matrix. it.node->idx is the current element index
+ +    const SparseMat::Node* node() const;
+ +
+ +    //! moves iterator to the previous element
+ +    SparseMatConstIterator& operator --();
+ +    //! moves iterator to the previous element
+ +    SparseMatConstIterator operator --(int);
+ +    //! moves iterator to the next element
+ +    SparseMatConstIterator& operator ++();
+ +    //! moves iterator to the next element
+ +    SparseMatConstIterator operator ++(int);
+ +
+ +    //! moves iterator to the element after the last element
+ +    void seekEnd();
+ +
+ +    const SparseMat* m;
+ +    size_t hashidx;
+ +    uchar* ptr;
+ +};
   
   
- -template<typename _Tp> inline Mat_<_Tp>::operator vector<_Tp>() const
- -{
- -    vector<_Tp> v;
- -    copyTo(v);
- -    return v;
- -}
   
- -template<typename _Tp> template<int n> inline Mat_<_Tp>::operator Vec<typename DataType<_Tp>::channel_type, n>() const
- -{
- -    CV_Assert(n % DataType<_Tp>::channels == 0);
- -    return this->Mat::operator Vec<typename DataType<_Tp>::channel_type, n>();
- -}
+ +////////////////////////////////// SparseMatIterator /////////////////////////////////
   
- -template<typename _Tp> template<int m, int n> inline Mat_<_Tp>::operator Matx<typename DataType<_Tp>::channel_type, m, n>() const
- -{
- -    CV_Assert(n % DataType<_Tp>::channels == 0);
- -    return this->Mat::operator Matx<typename DataType<_Tp>::channel_type, m, n>();
- -}
+ +/*!
+ + Read-write Sparse Matrix Iterator
   
- -template<typename T1, typename T2, typename Op> inline void
- -process( const Mat_<T1>& m1, Mat_<T2>& m2, Op op )
+ + The class is similar to cv::SparseMatConstIterator,
+ + but can be used for in-place modification of the matrix elements.
+ +*/
+ +class CV_EXPORTS SparseMatIterator : public SparseMatConstIterator
   {
- -    int y, x, rows = m1.rows, cols = m1.cols;
+ +public:
+ +    //! the default constructor
+ +    SparseMatIterator();
+ +    //! the full constructor setting the iterator to the first sparse matrix element
+ +    SparseMatIterator(SparseMat* _m);
+ +    //! the full constructor setting the iterator to the specified sparse matrix element
+ +    SparseMatIterator(SparseMat* _m, const int* idx);
+ +    //! the copy constructor
+ +    SparseMatIterator(const SparseMatIterator& it);
+ +
+ +    //! the assignment operator
+ +    SparseMatIterator& operator = (const SparseMatIterator& it);
+ +    //! returns read-write reference to the current sparse matrix element
+ +    template<typename _Tp> _Tp& value() const;
+ +    //! returns pointer to the current sparse matrix node. it.node->idx is the index of the current element (do not modify it!)
+ +    SparseMat::Node* node() const;
+ +
+ +    //! moves iterator to the next element
+ +    SparseMatIterator& operator ++();
+ +    //! moves iterator to the next element
+ +    SparseMatIterator operator ++(int);
+ +};
   
- -    CV_DbgAssert( m1.size() == m2.size() );
   
- -    for( y = 0; y < rows; y++ )
- -    {
- -        const T1* src = m1[y];
- -        T2* dst = m2[y];
   
- -        for( x = 0; x < cols; x++ )
- -            dst[x] = op(src[x]);
- -    }
- -}
+ +/////////////////////////////// SparseMatConstIterator_ //////////////////////////////
   
- -template<typename T1, typename T2, typename T3, typename Op> inline void
- -process( const Mat_<T1>& m1, const Mat_<T2>& m2, Mat_<T3>& m3, Op op )
- -{
- -    int y, x, rows = m1.rows, cols = m1.cols;
+ +/*!
+ + Template Read-Only Sparse Matrix Iterator Class.
   
- -    CV_DbgAssert( m1.size() == m2.size() );
+ + This is the derived from SparseMatConstIterator class that
+ + introduces more convenient operator *() for accessing the current element.
+ +*/
+ +template<typename _Tp> class SparseMatConstIterator_ : public SparseMatConstIterator
+ +{
+ +public:
   
- -    for( y = 0; y < rows; y++ )
- -    {
- -        const T1* src1 = m1[y];
- -        const T2* src2 = m2[y];
- -        T3* dst = m3[y];
+ +#ifndef OPENCV_NOSTL
+ +    typedef std::forward_iterator_tag iterator_category;
+ +#endif
   
- -        for( x = 0; x < cols; x++ )
- -            dst[x] = op( src1[x], src2[x] );
- -    }
- -}
+ +    //! the default constructor
+ +    SparseMatConstIterator_();
+ +    //! the full constructor setting the iterator to the first sparse matrix element
+ +    SparseMatConstIterator_(const SparseMat_<_Tp>* _m);
+ +    SparseMatConstIterator_(const SparseMat* _m);
+ +    //! the copy constructor
+ +    SparseMatConstIterator_(const SparseMatConstIterator_& it);
+ +
+ +    //! the assignment operator
+ +    SparseMatConstIterator_& operator = (const SparseMatConstIterator_& it);
+ +    //! the element access operator
+ +    const _Tp& operator *() const;
+ +
+ +    //! moves iterator to the next element
+ +    SparseMatConstIterator_& operator ++();
+ +    //! moves iterator to the next element
+ +    SparseMatConstIterator_ operator ++(int);
+ +};
   
   
- -/////////////////////////////// Input/Output Arrays /////////////////////////////////
   
- -template<typename _Tp> inline _InputArray::_InputArray(const vector<_Tp>& vec)
- -    : flags(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type), obj((void*)&vec) {}
+ +///////////////////////////////// SparseMatIterator_ /////////////////////////////////
   
- -template<typename _Tp> inline _InputArray::_InputArray(const vector<vector<_Tp> >& vec)
- -    : flags(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type), obj((void*)&vec) {}
+ +/*!
+ + Template Read-Write Sparse Matrix Iterator Class.
   
- -template<typename _Tp> inline _InputArray::_InputArray(const vector<Mat_<_Tp> >& vec)
- -    : flags(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type), obj((void*)&vec) {}
+ + This is the derived from cv::SparseMatConstIterator_ class that
+ + introduces more convenient operator *() for accessing the current element.
+ +*/
+ +template<typename _Tp> class CV_EXPORTS SparseMatIterator_ : public SparseMatConstIterator_<_Tp>
+ +{
+ +public:
   
- -template<typename _Tp, int m, int n> inline _InputArray::_InputArray(const Matx<_Tp, m, n>& mtx)
- -    : flags(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type), obj((void*)&mtx), sz(n, m) {}
+ +#ifndef OPENCV_NOSTL
+ +    typedef std::forward_iterator_tag iterator_category;
+ +#endif
   
- -template<typename _Tp> inline _InputArray::_InputArray(const _Tp* vec, int n)
- -    : flags(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type), obj((void*)vec), sz(n, 1) {}
+ +    //! the default constructor
+ +    SparseMatIterator_();
+ +    //! the full constructor setting the iterator to the first sparse matrix element
+ +    SparseMatIterator_(SparseMat_<_Tp>* _m);
+ +    SparseMatIterator_(SparseMat* _m);
+ +    //! the copy constructor
+ +    SparseMatIterator_(const SparseMatIterator_& it);
+ +
+ +    //! the assignment operator
+ +    SparseMatIterator_& operator = (const SparseMatIterator_& it);
+ +    //! returns the reference to the current element
+ +    _Tp& operator *() const;
+ +
+ +    //! moves the iterator to the next element
+ +    SparseMatIterator_& operator ++();
+ +    //! moves the iterator to the next element
+ +    SparseMatIterator_ operator ++(int);
+ +};
   
- -inline _InputArray::_InputArray(const Scalar& s)
- -    : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&s), sz(1, 4) {}
   
- -template<typename _Tp> inline _InputArray::_InputArray(const Mat_<_Tp>& m)
- -    : flags(FIXED_TYPE + MAT + DataType<_Tp>::type), obj((void*)&m) {}
   
- -template<typename _Tp> inline _OutputArray::_OutputArray(vector<_Tp>& vec)
- -    : _InputArray(vec) {}
- -template<typename _Tp> inline _OutputArray::_OutputArray(vector<vector<_Tp> >& vec)
- -    : _InputArray(vec) {}
- -template<typename _Tp> inline _OutputArray::_OutputArray(vector<Mat_<_Tp> >& vec)
- -    : _InputArray(vec) {}
- -template<typename _Tp> inline _OutputArray::_OutputArray(Mat_<_Tp>& m)
- -    : _InputArray(m) {}
- -template<typename _Tp, int m, int n> inline _OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx)
- -    : _InputArray(mtx) {}
- -template<typename _Tp> inline _OutputArray::_OutputArray(_Tp* vec, int n)
- -    : _InputArray(vec, n) {}
+ +/////////////////////////////////// NAryMatIterator //////////////////////////////////
+ +
+ +/*!
+ + n-Dimensional Dense Matrix Iterator Class.
+ +
+ + The class cv::NAryMatIterator is used for iterating over one or more n-dimensional dense arrays (cv::Mat's).
+ +
+ + The iterator is completely different from cv::Mat_ and cv::SparseMat_ iterators.
+ + It iterates through the slices (or planes), not the elements, where "slice" is a continuous part of the arrays.
+ +
+ + Here is the example on how the iterator can be used to normalize 3D histogram:
+ +
+ + \code
+ + void normalizeColorHist(Mat& hist)
+ + {
+ + #if 1
+ +     // intialize iterator (the style is different from STL).
+ +     // after initialization the iterator will contain
+ +     // the number of slices or planes
+ +     // the iterator will go through
+ +     Mat* arrays[] = { &hist, 0 };
+ +     Mat planes[1];
+ +     NAryMatIterator it(arrays, planes);
+ +     double s = 0;
+ +     // iterate through the matrix. on each iteration
+ +     // it.planes[i] (of type Mat) will be set to the current plane of
+ +     // i-th n-dim matrix passed to the iterator constructor.
+ +     for(int p = 0; p < it.nplanes; p++, ++it)
+ +        s += sum(it.planes[0])[0];
+ +     it = NAryMatIterator(hist);
+ +     s = 1./s;
+ +     for(int p = 0; p < it.nplanes; p++, ++it)
+ +        it.planes[0] *= s;
+ + #elif 1
+ +     // this is a shorter implementation of the above
+ +     // using built-in operations on Mat
+ +     double s = sum(hist)[0];
+ +     hist.convertTo(hist, hist.type(), 1./s, 0);
+ + #else
+ +     // and this is even shorter one
+ +     // (assuming that the histogram elements are non-negative)
+ +     normalize(hist, hist, 1, 0, NORM_L1);
+ + #endif
+ + }
+ + \endcode
+ +
+ + You can iterate through several matrices simultaneously as long as they have the same geometry
+ + (dimensionality and all the dimension sizes are the same), which is useful for binary
+ + and n-ary operations on such matrices. Just pass those matrices to cv::MatNDIterator.
+ + Then, during the iteration it.planes[0], it.planes[1], ... will
+ + be the slices of the corresponding matrices
+ +*/
+ +class CV_EXPORTS NAryMatIterator
+ +{
+ +public:
+ +    //! the default constructor
+ +    NAryMatIterator();
+ +    //! the full constructor taking arbitrary number of n-dim matrices
+ +    NAryMatIterator(const Mat** arrays, uchar** ptrs, int narrays=-1);
+ +    //! the full constructor taking arbitrary number of n-dim matrices
+ +    NAryMatIterator(const Mat** arrays, Mat* planes, int narrays=-1);
+ +    //! the separate iterator initialization method
+ +    void init(const Mat** arrays, Mat* planes, uchar** ptrs, int narrays=-1);
+ +
+ +    //! proceeds to the next plane of every iterated matrix
+ +    NAryMatIterator& operator ++();
+ +    //! proceeds to the next plane of every iterated matrix (postfix increment operator)
+ +    NAryMatIterator operator ++(int);
+ +
+ +    //! the iterated arrays
+ +    const Mat** arrays;
+ +    //! the current planes
+ +    Mat* planes;
+ +    //! data pointers
+ +    uchar** ptrs;
+ +    //! the number of arrays
+ +    int narrays;
+ +    //! the number of hyper-planes that the iterator steps through
+ +    size_t nplanes;
+ +    //! the size of each segment (in elements)
+ +    size_t size;
+ +protected:
+ +    int iterdepth;
+ +    size_t idx;
+ +};
   
- -template<typename _Tp> inline _OutputArray::_OutputArray(const vector<_Tp>& vec)
- -    : _InputArray(vec) {flags |= FIXED_SIZE;}
- -template<typename _Tp> inline _OutputArray::_OutputArray(const vector<vector<_Tp> >& vec)
- -    : _InputArray(vec) {flags |= FIXED_SIZE;}
- -template<typename _Tp> inline _OutputArray::_OutputArray(const vector<Mat_<_Tp> >& vec)
- -    : _InputArray(vec) {flags |= FIXED_SIZE;}
   
- -template<typename _Tp> inline _OutputArray::_OutputArray(const Mat_<_Tp>& m)
- -    : _InputArray(m) {flags |= FIXED_SIZE;}
- -template<typename _Tp, int m, int n> inline _OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx)
- -    : _InputArray(mtx) {}
- -template<typename _Tp> inline _OutputArray::_OutputArray(const _Tp* vec, int n)
- -    : _InputArray(vec, n) {}
   
- -//////////////////////////////////// Matrix Expressions /////////////////////////////////////////
+ +///////////////////////////////// Matrix Expressions /////////////////////////////////
   
   class CV_EXPORTS MatOp
   {
diff --cc modules/core/include/opencv2/core/private.hpp

index 12961b3,0000000..6d3cd9b

mode 100644,000000..100644
--- 1/modules/core/include/opencv2/core/private.hpp
--- /dev/null
+++ b/modules/core/include/opencv2/core/private.hpp
@@@ -1,364 -1,0 +1,388 @@@
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                          License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_CORE_PRIVATE_HPP__
+ +#define __OPENCV_CORE_PRIVATE_HPP__
+ +
+ +#ifndef __OPENCV_BUILD
+ +#  error this is a private header which should not be used from outside of the OpenCV library
+ +#endif
+ +
+ +#include "opencv2/core.hpp"
+ +#include "cvconfig.h"
+ +
+ +#ifdef HAVE_EIGEN
+ +#  if defined __GNUC__ && defined __APPLE__
+ +#    pragma GCC diagnostic ignored "-Wshadow"
+ +#  endif
+ +#  include <Eigen/Core>
+ +#  include "opencv2/core/eigen.hpp"
+ +#endif
+ +
+ +#ifdef HAVE_TBB
+ +#  include "tbb/tbb_stddef.h"
+ +#  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
+ +#    include "tbb/tbb.h"
+ +#    include "tbb/task.h"
+ +#    undef min
+ +#    undef max
+ +#  else
+ +#    undef HAVE_TBB
+ +#  endif
+ +#endif
+ +
++#ifdef _OPENMP
++#  define HAVE_OPENMP
++#endif
++
++#ifdef __APPLE__
++#  define HAVE_GCD
++#endif
++
++#if defined _MSC_VER && _MSC_VER >= 1600
++#  define HAVE_CONCURRENCY
++#endif
++
++#if defined HAVE_TBB
++#  define CV_PARALLEL_FRAMEWORK "tbb"
++#elif defined HAVE_CSTRIPES
++#  define CV_PARALLEL_FRAMEWORK "cstripes"
++#elif defined HAVE_OPENMP
++#  define CV_PARALLEL_FRAMEWORK "openmp"
++#elif defined HAVE_GCD
++#  define CV_PARALLEL_FRAMEWORK "gcd"
++#elif defined HAVE_CONCURRENCY
++#  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
++#endif
++
+ +namespace cv
+ +{
+ +#ifdef HAVE_TBB
+ +
+ +    typedef tbb::blocked_range<int> BlockedRange;
+ +
+ +    template<typename Body> static inline
+ +    void parallel_for( const BlockedRange& range, const Body& body )
+ +    {
+ +        tbb::parallel_for(range, body);
+ +    }
+ +
+ +    typedef tbb::split Split;
+ +
+ +    template<typename Body> static inline
+ +    void parallel_reduce( const BlockedRange& range, Body& body )
+ +    {
+ +        tbb::parallel_reduce(range, body);
+ +    }
+ +
+ +    typedef tbb::concurrent_vector<Rect> ConcurrentRectVector;
+ +#else
+ +    class BlockedRange
+ +    {
+ +    public:
+ +        BlockedRange() : _begin(0), _end(0), _grainsize(0) {}
+ +        BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {}
+ +        int begin() const { return _begin; }
+ +        int end() const { return _end; }
+ +        int grainsize() const { return _grainsize; }
+ +
+ +    protected:
+ +        int _begin, _end, _grainsize;
+ +    };
+ +
+ +    template<typename Body> static inline
+ +    void parallel_for( const BlockedRange& range, const Body& body )
+ +    {
+ +        body(range);
+ +    }
+ +    typedef std::vector<Rect> ConcurrentRectVector;
+ +
+ +    class Split {};
+ +
+ +    template<typename Body> static inline
+ +    void parallel_reduce( const BlockedRange& range, Body& body )
+ +    {
+ +        body(range);
+ +    }
+ +#endif
+ +} //namespace cv
+ +
+ +#define CV_INIT_ALGORITHM(classname, algname, memberinit) \
+ +    static ::cv::Algorithm* create##classname##_hidden() \
+ +    { \
+ +        return new classname; \
+ +    } \
+ +    \
+ +    static ::cv::AlgorithmInfo& classname##_info() \
+ +    { \
+ +        static ::cv::AlgorithmInfo classname##_info_var(algname, create##classname##_hidden); \
+ +        return classname##_info_var; \
+ +    } \
+ +    \
+ +    static ::cv::AlgorithmInfo& classname##_info_auto = classname##_info(); \
+ +    \
+ +    ::cv::AlgorithmInfo* classname::info() const \
+ +    { \
+ +        static volatile bool initialized = false; \
+ +        \
+ +        if( !initialized ) \
+ +        { \
+ +            initialized = true; \
+ +            classname obj; \
+ +            memberinit; \
+ +        } \
+ +        return &classname##_info(); \
+ +    }
+ +
+ +
+ +
+ +/****************************************************************************************\
+ +*                                  Common declarations                                   *
+ +\****************************************************************************************/
+ +
+ +/* the alignment of all the allocated buffers */
+ +#define  CV_MALLOC_ALIGN    16
+ +
+ +#ifdef __GNUC__
+ +#  define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
+ +#elif defined _MSC_VER
+ +#  define CV_DECL_ALIGNED(x) __declspec(align(x))
+ +#else
+ +#  define CV_DECL_ALIGNED(x)
+ +#endif
+ +
+ +/* IEEE754 constants and macros */
+ +#define  CV_TOGGLE_FLT(x) ((x)^((int)(x) < 0 ? 0x7fffffff : 0))
+ +#define  CV_TOGGLE_DBL(x) ((x)^((int64)(x) < 0 ? CV_BIG_INT(0x7fffffffffffffff) : 0))
+ +
+ +static inline void* cvAlignPtr( const void* ptr, int align = 32 )
+ +{
+ +    CV_DbgAssert ( (align & (align-1)) == 0 );
+ +    return (void*)( ((size_t)ptr + align - 1) & ~(size_t)(align-1) );
+ +}
+ +
+ +static inline int cvAlign( int size, int align )
+ +{
+ +    CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
+ +    return (size + align - 1) & -align;
+ +}
+ +
+ +#ifdef IPL_DEPTH_8U
+ +static inline cv::Size cvGetMatSize( const CvMat* mat )
+ +{
+ +    return cv::Size(mat->cols, mat->rows);
+ +}
+ +#endif
+ +
+ +namespace cv
+ +{
+ +CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0);
+ +}
+ +
+ +
+ +/****************************************************************************************\
+ +*                     Structures and macros for integration with IPP                     *
+ +\****************************************************************************************/
+ +
+ +#ifdef HAVE_IPP
+ +#  include "ipp.h"
+ +
+ +static inline IppiSize ippiSize(int width, int height)
+ +{
+ +    IppiSize size = { width, height };
+ +    return size;
+ +}
+ +#endif
+ +
+ +#ifndef IPPI_CALL
+ +#  define IPPI_CALL(func) CV_Assert((func) >= 0)
+ +#endif
+ +
+ +/* IPP-compatible return codes */
+ +typedef enum CvStatus
+ +{
+ +    CV_BADMEMBLOCK_ERR          = -113,
+ +    CV_INPLACE_NOT_SUPPORTED_ERR= -112,
+ +    CV_UNMATCHED_ROI_ERR        = -111,
+ +    CV_NOTFOUND_ERR             = -110,
+ +    CV_BADCONVERGENCE_ERR       = -109,
+ +
+ +    CV_BADDEPTH_ERR             = -107,
+ +    CV_BADROI_ERR               = -106,
+ +    CV_BADHEADER_ERR            = -105,
+ +    CV_UNMATCHED_FORMATS_ERR    = -104,
+ +    CV_UNSUPPORTED_COI_ERR      = -103,
+ +    CV_UNSUPPORTED_CHANNELS_ERR = -102,
+ +    CV_UNSUPPORTED_DEPTH_ERR    = -101,
+ +    CV_UNSUPPORTED_FORMAT_ERR   = -100,
+ +
+ +    CV_BADARG_ERR               = -49,  //ipp comp
+ +    CV_NOTDEFINED_ERR           = -48,  //ipp comp
+ +
+ +    CV_BADCHANNELS_ERR          = -47,  //ipp comp
+ +    CV_BADRANGE_ERR             = -44,  //ipp comp
+ +    CV_BADSTEP_ERR              = -29,  //ipp comp
+ +
+ +    CV_BADFLAG_ERR              =  -12,
+ +    CV_DIV_BY_ZERO_ERR          =  -11, //ipp comp
+ +    CV_BADCOEF_ERR              =  -10,
+ +
+ +    CV_BADFACTOR_ERR            =  -7,
+ +    CV_BADPOINT_ERR             =  -6,
+ +    CV_BADSCALE_ERR             =  -4,
+ +    CV_OUTOFMEM_ERR             =  -3,
+ +    CV_NULLPTR_ERR              =  -2,
+ +    CV_BADSIZE_ERR              =  -1,
+ +    CV_NO_ERR                   =   0,
+ +    CV_OK                       =   CV_NO_ERR
+ +}
+ +CvStatus;
+ +
+ +
+ +
+ +/****************************************************************************************\
+ +*                                  Auxiliary algorithms                                  *
+ +\****************************************************************************************/
+ +
+ +namespace cv
+ +{
+ +
+ +// This function splits the input sequence or set into one or more equivalence classes and
+ +// returns the vector of labels - 0-based class indexes for each element.
+ +// predicate(a,b) returns true if the two sequence elements certainly belong to the same class.
+ +//
+ +// The algorithm is described in "Introduction to Algorithms"
+ +// by Cormen, Leiserson and Rivest, the chapter "Data structures for disjoint sets"
+ +template<typename _Tp, class _EqPredicate> int
+ +partition( const std::vector<_Tp>& _vec, std::vector<int>& labels,
+ +           _EqPredicate predicate=_EqPredicate())
+ +{
+ +    int i, j, N = (int)_vec.size();
+ +    const _Tp* vec = &_vec[0];
+ +
+ +    const int PARENT=0;
+ +    const int RANK=1;
+ +
+ +    std::vector<int> _nodes(N*2);
+ +    int (*nodes)[2] = (int(*)[2])&_nodes[0];
+ +
+ +    // The first O(N) pass: create N single-vertex trees
+ +    for(i = 0; i < N; i++)
+ +    {
+ +        nodes[i][PARENT]=-1;
+ +        nodes[i][RANK] = 0;
+ +    }
+ +
+ +    // The main O(N^2) pass: merge connected components
+ +    for( i = 0; i < N; i++ )
+ +    {
+ +        int root = i;
+ +
+ +        // find root
+ +        while( nodes[root][PARENT] >= 0 )
+ +            root = nodes[root][PARENT];
+ +
+ +        for( j = 0; j < N; j++ )
+ +        {
+ +            if( i == j || !predicate(vec[i], vec[j]))
+ +                continue;
+ +            int root2 = j;
+ +
+ +            while( nodes[root2][PARENT] >= 0 )
+ +                root2 = nodes[root2][PARENT];
+ +
+ +            if( root2 != root )
+ +            {
+ +                // unite both trees
+ +                int rank = nodes[root][RANK], rank2 = nodes[root2][RANK];
+ +                if( rank > rank2 )
+ +                    nodes[root2][PARENT] = root;
+ +                else
+ +                {
+ +                    nodes[root][PARENT] = root2;
+ +                    nodes[root2][RANK] += rank == rank2;
+ +                    root = root2;
+ +                }
+ +                CV_Assert( nodes[root][PARENT] < 0 );
+ +
+ +                int k = j, parent;
+ +
+ +                // compress the path from node2 to root
+ +                while( (parent = nodes[k][PARENT]) >= 0 )
+ +                {
+ +                    nodes[k][PARENT] = root;
+ +                    k = parent;
+ +                }
+ +
+ +                // compress the path from node to root
+ +                k = i;
+ +                while( (parent = nodes[k][PARENT]) >= 0 )
+ +                {
+ +                    nodes[k][PARENT] = root;
+ +                    k = parent;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    // Final O(N) pass: enumerate classes
+ +    labels.resize(N);
+ +    int nclasses = 0;
+ +
+ +    for( i = 0; i < N; i++ )
+ +    {
+ +        int root = i;
+ +        while( nodes[root][PARENT] >= 0 )
+ +            root = nodes[root][PARENT];
+ +        // re-use the rank as the class label
+ +        if( nodes[root][RANK] >= 0 )
+ +            nodes[root][RANK] = ~nclasses++;
+ +        labels[i] = ~nodes[root][RANK];
+ +    }
+ +
+ +    return nclasses;
+ +}
+ +
+ +} // namespace cv
+ +
+ +#endif // __OPENCV_CORE_PRIVATE_HPP__
diff --cc modules/core/src/matrix.cpp

index 053dd1c,5a3600b..d2032b2
--- 1/modules/core/src/matrix.cpp
--- 2/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@@ -995,9 -980,15 +995,14 @@@ Mat _InputArray::getMat(int i) cons
           return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
       }
   
- -    CV_Assert( k == STD_VECTOR_MAT );
- -    //if( k == STD_VECTOR_MAT )
+     if( k == OCL_MAT )
+     {
+         CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+     }
+ 
+ +    if( k == STD_VECTOR_MAT )
       {
- -        const vector<Mat>& v = *(const vector<Mat>*)obj;
+ +        const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
           CV_Assert( 0 <= i && i < (int)v.size() );
   
           return v[i];
@@@ -1217,19 -1192,24 +1227,24 @@@ Size _InputArray::size(int i) cons
           return buf->size();
       }
   
- -    if( k == OPENGL_TEXTURE )
+ +    if( k == GPU_MAT )
       {
           CV_Assert( i < 0 );
- -        const ogl::Texture2D* tex = (const ogl::Texture2D*)obj;
- -        return tex->size();
+ +        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+ +        return d_mat->size();
       }
   
- -    CV_Assert( k == GPU_MAT );
- -    //if( k == GPU_MAT )
+     if( k == OCL_MAT )
+     {
+         CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+     }
+ 
+ +    CV_Assert( k == CUDA_MEM );
+ +    //if( k == CUDA_MEM )
       {
           CV_Assert( i < 0 );
- -        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
- -        return d_mat->size();
+ +        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+ +        return cuda_mem->size();
       }
   }
   
@@@ -1338,12 -1315,17 +1353,17 @@@ bool _InputArray::empty() cons
       if( k == OPENGL_BUFFER )
           return ((const ogl::Buffer*)obj)->empty();
   
- -    if( k == OPENGL_TEXTURE )
- -        return ((const ogl::Texture2D*)obj)->empty();
- -
+     if( k == OCL_MAT )
+     {
+         CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+     }
+ 
- -    CV_Assert( k == GPU_MAT );
- -    //if( k == GPU_MAT )
+ +    if( k == GPU_MAT )
           return ((const gpu::GpuMat*)obj)->empty();
+ +
+ +    CV_Assert( k == CUDA_MEM );
+ +    //if( k == CUDA_MEM )
+ +        return ((const gpu::CudaMem*)obj)->empty();
   }
   
   
diff --cc modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp

index e993c64,0000000..cb84c23

mode 100644,000000..100644
--- 1/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
--- /dev/null
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp
@@@ -1,1028 -1,0 +1,1028 @@@
- typedef               char Ncv8s;
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef _ncv_hpp_
+ +#define _ncv_hpp_
+ +
+ +#include "opencv2/core/cvdef.h"
+ +
+ +#ifdef _WIN32
+ +    #define WIN32_LEAN_AND_MEAN
+ +#endif
+ +
+ +#include <cuda_runtime.h>
+ +#include "opencv2/core/cvstd.hpp"
+ +#include "opencv2/core/utility.hpp"
+ +
+ +
+ +//==============================================================================
+ +//
+ +// Compile-time assert functionality
+ +//
+ +//==============================================================================
+ +
+ +
+ +/**
+ +* Compile-time assert namespace
+ +*/
+ +namespace NcvCTprep
+ +{
+ +    template <bool x>
+ +    struct CT_ASSERT_FAILURE;
+ +
+ +    template <>
+ +    struct CT_ASSERT_FAILURE<true> {};
+ +
+ +    template <int x>
+ +    struct assertTest{};
+ +}
+ +
+ +
+ +#define NCV_CT_PREP_PASTE_AUX(a,b)      a##b                         ///< Concatenation indirection macro
+ +#define NCV_CT_PREP_PASTE(a,b)          NCV_CT_PREP_PASTE_AUX(a, b)  ///< Concatenation macro
+ +
+ +
+ +/**
+ +* Performs compile-time assertion of a condition on the file scope
+ +*/
+ +#define NCV_CT_ASSERT(X) \
+ +    typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
+ +    NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
+ +
+ +
+ +
+ +//==============================================================================
+ +//
+ +// Alignment macros
+ +//
+ +//==============================================================================
+ +
+ +
+ +#if !defined(__align__) && !defined(__CUDACC__)
+ +    #if defined(_WIN32) || defined(_WIN64)
+ +        #define __align__(n)         __declspec(align(n))
+ +    #elif defined(__unix__)
+ +        #define __align__(n)         __attribute__((__aligned__(n)))
+ +    #endif
+ +#endif
+ +
+ +
+ +//==============================================================================
+ +//
+ +// Integral and compound types of guaranteed size
+ +//
+ +//==============================================================================
+ +
+ +
+ +typedef               bool NcvBool;
+ +typedef          long long Ncv64s;
+ +
+ +#if defined(__APPLE__) && !defined(__CUDACC__)
+ +    typedef uint64_t Ncv64u;
+ +#else
+ +    typedef unsigned long long Ncv64u;
+ +#endif
+ +
+ +typedef                int Ncv32s;
+ +typedef       unsigned int Ncv32u;
+ +typedef              short Ncv16s;
+ +typedef     unsigned short Ncv16u;
++typedef        signed char Ncv8s;
+ +typedef      unsigned char Ncv8u;
+ +typedef              float Ncv32f;
+ +typedef             double Ncv64f;
+ +
+ +
+ +struct NcvRect8u
+ +{
+ +    Ncv8u x;
+ +    Ncv8u y;
+ +    Ncv8u width;
+ +    Ncv8u height;
+ +    __host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {};
+ +    __host__ __device__ NcvRect8u(Ncv8u x_, Ncv8u y_, Ncv8u width_, Ncv8u height_) : x(x_), y(y_), width(width_), height(height_) {}
+ +};
+ +
+ +
+ +struct NcvRect32s
+ +{
+ +    Ncv32s x;          ///< x-coordinate of upper left corner.
+ +    Ncv32s y;          ///< y-coordinate of upper left corner.
+ +    Ncv32s width;      ///< Rectangle width.
+ +    Ncv32s height;     ///< Rectangle height.
+ +    __host__ __device__ NcvRect32s() : x(0), y(0), width(0), height(0) {};
+ +    __host__ __device__ NcvRect32s(Ncv32s x_, Ncv32s y_, Ncv32s width_, Ncv32s height_)
+ +        : x(x_), y(y_), width(width_), height(height_) {}
+ +};
+ +
+ +
+ +struct NcvRect32u
+ +{
+ +    Ncv32u x;          ///< x-coordinate of upper left corner.
+ +    Ncv32u y;          ///< y-coordinate of upper left corner.
+ +    Ncv32u width;      ///< Rectangle width.
+ +    Ncv32u height;     ///< Rectangle height.
+ +    __host__ __device__ NcvRect32u() : x(0), y(0), width(0), height(0) {};
+ +    __host__ __device__ NcvRect32u(Ncv32u x_, Ncv32u y_, Ncv32u width_, Ncv32u height_)
+ +        : x(x_), y(y_), width(width_), height(height_) {}
+ +};
+ +
+ +
+ +struct NcvSize32s
+ +{
+ +    Ncv32s width;  ///< Rectangle width.
+ +    Ncv32s height; ///< Rectangle height.
+ +    __host__ __device__ NcvSize32s() : width(0), height(0) {};
+ +    __host__ __device__ NcvSize32s(Ncv32s width_, Ncv32s height_) : width(width_), height(height_) {}
+ +};
+ +
+ +
+ +struct NcvSize32u
+ +{
+ +    Ncv32u width;  ///< Rectangle width.
+ +    Ncv32u height; ///< Rectangle height.
+ +    __host__ __device__ NcvSize32u() : width(0), height(0) {};
+ +    __host__ __device__ NcvSize32u(Ncv32u width_, Ncv32u height_) : width(width_), height(height_) {}
+ +    __host__ __device__ bool operator == (const NcvSize32u &another) const {return this->width == another.width && this->height == another.height;}
+ +};
+ +
+ +
+ +struct NcvPoint2D32s
+ +{
+ +    Ncv32s x; ///< Point X.
+ +    Ncv32s y; ///< Point Y.
+ +    __host__ __device__ NcvPoint2D32s() : x(0), y(0) {};
+ +    __host__ __device__ NcvPoint2D32s(Ncv32s x_, Ncv32s y_) : x(x_), y(y_) {}
+ +};
+ +
+ +
+ +struct NcvPoint2D32u
+ +{
+ +    Ncv32u x; ///< Point X.
+ +    Ncv32u y; ///< Point Y.
+ +    __host__ __device__ NcvPoint2D32u() : x(0), y(0) {};
+ +    __host__ __device__ NcvPoint2D32u(Ncv32u x_, Ncv32u y_) : x(x_), y(y_) {}
+ +};
+ +
+ +
+ +NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
+ +NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
+ +NCV_CT_ASSERT(sizeof(Ncv64u) == 8);
+ +NCV_CT_ASSERT(sizeof(Ncv32s) == 4);
+ +NCV_CT_ASSERT(sizeof(Ncv32u) == 4);
+ +NCV_CT_ASSERT(sizeof(Ncv16s) == 2);
+ +NCV_CT_ASSERT(sizeof(Ncv16u) == 2);
+ +NCV_CT_ASSERT(sizeof(Ncv8s) == 1);
+ +NCV_CT_ASSERT(sizeof(Ncv8u) == 1);
+ +NCV_CT_ASSERT(sizeof(Ncv32f) == 4);
+ +NCV_CT_ASSERT(sizeof(Ncv64f) == 8);
+ +NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
+ +NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
+ +NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
+ +NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
+ +NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u));
+ +
+ +
+ +//==============================================================================
+ +//
+ +// Persistent constants
+ +//
+ +//==============================================================================
+ +
+ +
+ +const Ncv32u K_WARP_SIZE = 32;
+ +const Ncv32u K_LOG2_WARP_SIZE = 5;
+ +
+ +
+ +//==============================================================================
+ +//
+ +// Error handling
+ +//
+ +//==============================================================================
+ +
+ +
+ +CV_EXPORTS void ncvDebugOutput(const cv::String &msg);
+ +
+ +
+ +typedef void NCVDebugOutputHandler(const cv::String &msg);
+ +
+ +
+ +CV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
+ +
+ +
+ +#define ncvAssertPrintCheck(pred, msg) \
+ +    do \
+ +    { \
+ +        if (!(pred)) \
+ +        { \
+ +            cv::String str = cv::format("NCV Assertion Failed: %s, file=%s, line=%d", msg, __FILE__, __LINE__); \
+ +            ncvDebugOutput(str); \
+ +        } \
+ +    } while (0)
+ +
+ +
+ +#define ncvAssertPrintReturn(pred, msg, err) \
+ +    do \
+ +    { \
+ +        ncvAssertPrintCheck(pred, msg); \
+ +        if (!(pred)) return err; \
+ +    } while (0)
+ +
+ +
+ +#define ncvAssertReturn(pred, err) \
+ +    do \
+ +    { \
+ +        cv::String msg = cv::format("retcode=%d", (int)err); \
+ +        ncvAssertPrintReturn(pred, msg.c_str(), err); \
+ +    } while (0)
+ +
+ +
+ +#define ncvAssertReturnNcvStat(ncvOp) \
+ +    do \
+ +    { \
+ +        NCVStatus _ncvStat = ncvOp; \
+ +        cv::String msg = cv::format("NcvStat=%d", (int)_ncvStat); \
+ +        ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, msg.c_str(), _ncvStat); \
+ +    } while (0)
+ +
+ +
+ +#define ncvAssertCUDAReturn(cudacall, errCode) \
+ +    do \
+ +    { \
+ +        cudaError_t res = cudacall; \
+ +        cv::String msg = cv::format("cudaError_t=%d", (int)res); \
+ +        ncvAssertPrintReturn(cudaSuccess==res, msg.c_str(), errCode); \
+ +    } while (0)
+ +
+ +
+ +#define ncvAssertCUDALastErrorReturn(errCode) \
+ +    do \
+ +    { \
+ +        cudaError_t res = cudaGetLastError(); \
+ +        cv::String msg = cv::format("cudaError_t=%d", (int)res); \
+ +        ncvAssertPrintReturn(cudaSuccess==res, msg.c_str(), errCode); \
+ +    } while (0)
+ +
+ +
+ +/**
+ +* Return-codes for status notification, errors and warnings
+ +*/
+ +enum
+ +{
+ +    //NCV statuses
+ +    NCV_SUCCESS,
+ +    NCV_UNKNOWN_ERROR,
+ +
+ +    NCV_CUDA_ERROR,
+ +    NCV_NPP_ERROR,
+ +    NCV_FILE_ERROR,
+ +
+ +    NCV_NULL_PTR,
+ +    NCV_INCONSISTENT_INPUT,
+ +    NCV_TEXTURE_BIND_ERROR,
+ +    NCV_DIMENSIONS_INVALID,
+ +
+ +    NCV_INVALID_ROI,
+ +    NCV_INVALID_STEP,
+ +    NCV_INVALID_SCALE,
+ +
+ +    NCV_ALLOCATOR_NOT_INITIALIZED,
+ +    NCV_ALLOCATOR_BAD_ALLOC,
+ +    NCV_ALLOCATOR_BAD_DEALLOC,
+ +    NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
+ +    NCV_ALLOCATOR_DEALLOC_ORDER,
+ +    NCV_ALLOCATOR_BAD_REUSE,
+ +
+ +    NCV_MEM_COPY_ERROR,
+ +    NCV_MEM_RESIDENCE_ERROR,
+ +    NCV_MEM_INSUFFICIENT_CAPACITY,
+ +
+ +    NCV_HAAR_INVALID_PIXEL_STEP,
+ +    NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
+ +    NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
+ +    NCV_HAAR_TOO_LARGE_FEATURES,
+ +    NCV_HAAR_XML_LOADING_EXCEPTION,
+ +
+ +    NCV_NOIMPL_HAAR_TILTED_FEATURES,
+ +    NCV_NOT_IMPLEMENTED,
+ +
+ +    NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
+ +
+ +    //NPP statuses
+ +    NPPST_SUCCESS = NCV_SUCCESS,              ///< Successful operation (same as NPP_NO_ERROR)
+ +    NPPST_ERROR,                              ///< Unknown error
+ +    NPPST_CUDA_KERNEL_EXECUTION_ERROR,        ///< CUDA kernel execution error
+ +    NPPST_NULL_POINTER_ERROR,                 ///< NULL pointer argument error
+ +    NPPST_TEXTURE_BIND_ERROR,                 ///< CUDA texture binding error or non-zero offset returned
+ +    NPPST_MEMCPY_ERROR,                       ///< CUDA memory copy error
+ +    NPPST_MEM_ALLOC_ERR,                      ///< CUDA memory allocation error
+ +    NPPST_MEMFREE_ERR,                        ///< CUDA memory deallocation error
+ +
+ +    //NPPST statuses
+ +    NPPST_INVALID_ROI,                        ///< Invalid region of interest argument
+ +    NPPST_INVALID_STEP,                       ///< Invalid image lines step argument (check sign, alignment, relation to image width)
+ +    NPPST_INVALID_SCALE,                      ///< Invalid scale parameter passed
+ +    NPPST_MEM_INSUFFICIENT_BUFFER,            ///< Insufficient user-allocated buffer
+ +    NPPST_MEM_RESIDENCE_ERROR,                ///< Memory residence error detected (check if pointers should be device or pinned)
+ +    NPPST_MEM_INTERNAL_ERROR,                 ///< Internal memory management error
+ +
+ +    NCV_LAST_STATUS                           ///< Marker to continue error numeration in other files
+ +};
+ +
+ +
+ +typedef Ncv32u NCVStatus;
+ +
+ +
+ +#define NCV_SET_SKIP_COND(x) \
+ +    bool __ncv_skip_cond = x
+ +
+ +
+ +#define NCV_RESET_SKIP_COND(x) \
+ +    __ncv_skip_cond = x
+ +
+ +
+ +#define NCV_SKIP_COND_BEGIN \
+ +    if (!__ncv_skip_cond) {
+ +
+ +
+ +#define NCV_SKIP_COND_END \
+ +    }
+ +
+ +
+ +//==============================================================================
+ +//
+ +// Timer
+ +//
+ +//==============================================================================
+ +
+ +
+ +typedef struct _NcvTimer *NcvTimer;
+ +
+ +CV_EXPORTS NcvTimer ncvStartTimer(void);
+ +
+ +CV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
+ +
+ +CV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
+ +
+ +
+ +//==============================================================================
+ +//
+ +// Memory management classes template compound types
+ +//
+ +//==============================================================================
+ +
+ +
+ +/**
+ +* Calculates the aligned top bound value
+ +*/
+ +CV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
+ +
+ +
+ +/**
+ +* NCVMemoryType
+ +*/
+ +enum NCVMemoryType
+ +{
+ +    NCVMemoryTypeNone,
+ +    NCVMemoryTypeHostPageable,
+ +    NCVMemoryTypeHostPinned,
+ +    NCVMemoryTypeDevice
+ +};
+ +
+ +
+ +/**
+ +* NCVMemPtr
+ +*/
+ +struct CV_EXPORTS NCVMemPtr
+ +{
+ +    void *ptr;
+ +    NCVMemoryType memtype;
+ +    void clear();
+ +};
+ +
+ +
+ +/**
+ +* NCVMemSegment
+ +*/
+ +struct CV_EXPORTS NCVMemSegment
+ +{
+ +    NCVMemPtr begin;
+ +    size_t size;
+ +    void clear();
+ +};
+ +
+ +
+ +/**
+ +* INCVMemAllocator (Interface)
+ +*/
+ +class CV_EXPORTS INCVMemAllocator
+ +{
+ +public:
+ +    virtual ~INCVMemAllocator() = 0;
+ +
+ +    virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0;
+ +    virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;
+ +
+ +    virtual NcvBool isInitialized(void) const = 0;
+ +    virtual NcvBool isCounting(void) const = 0;
+ +
+ +    virtual NCVMemoryType memType(void) const = 0;
+ +    virtual Ncv32u alignment(void) const = 0;
+ +    virtual size_t maxSize(void) const = 0;
+ +};
+ +
+ +inline INCVMemAllocator::~INCVMemAllocator() {}
+ +
+ +
+ +/**
+ +* NCVMemStackAllocator
+ +*/
+ +class CV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
+ +{
+ +    NCVMemStackAllocator();
+ +    NCVMemStackAllocator(const NCVMemStackAllocator &);
+ +
+ +public:
+ +
+ +    explicit NCVMemStackAllocator(Ncv32u alignment);
+ +    NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL);
+ +    virtual ~NCVMemStackAllocator();
+ +
+ +    virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
+ +    virtual NCVStatus dealloc(NCVMemSegment &seg);
+ +
+ +    virtual NcvBool isInitialized(void) const;
+ +    virtual NcvBool isCounting(void) const;
+ +
+ +    virtual NCVMemoryType memType(void) const;
+ +    virtual Ncv32u alignment(void) const;
+ +    virtual size_t maxSize(void) const;
+ +
+ +private:
+ +
+ +    NCVMemoryType _memType;
+ +    Ncv32u _alignment;
+ +    Ncv8u *allocBegin;
+ +    Ncv8u *begin;
+ +    Ncv8u *end;
+ +    size_t currentSize;
+ +    size_t _maxSize;
+ +    NcvBool bReusesMemory;
+ +};
+ +
+ +
+ +/**
+ +* NCVMemNativeAllocator
+ +*/
+ +class CV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
+ +{
+ +public:
+ +
+ +    NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);
+ +    virtual ~NCVMemNativeAllocator();
+ +
+ +    virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
+ +    virtual NCVStatus dealloc(NCVMemSegment &seg);
+ +
+ +    virtual NcvBool isInitialized(void) const;
+ +    virtual NcvBool isCounting(void) const;
+ +
+ +    virtual NCVMemoryType memType(void) const;
+ +    virtual Ncv32u alignment(void) const;
+ +    virtual size_t maxSize(void) const;
+ +
+ +private:
+ +
+ +    NCVMemNativeAllocator();
+ +    NCVMemNativeAllocator(const NCVMemNativeAllocator &);
+ +
+ +    NCVMemoryType _memType;
+ +    Ncv32u _alignment;
+ +    size_t currentSize;
+ +    size_t _maxSize;
+ +};
+ +
+ +
+ +/**
+ +* Copy dispatchers
+ +*/
+ +CV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
+ +                                       const void *src, NCVMemoryType srcType,
+ +                                       size_t sz, cudaStream_t cuStream);
+ +
+ +
+ +CV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
+ +                                         const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
+ +                                         Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
+ +
+ +
+ +/**
+ +* NCVVector (1D)
+ +*/
+ +template <class T>
+ +class NCVVector
+ +{
+ +    NCVVector(const NCVVector &);
+ +
+ +public:
+ +
+ +    NCVVector()
+ +    {
+ +        clear();
+ +    }
+ +
+ +    virtual ~NCVVector() {}
+ +
+ +    void clear()
+ +    {
+ +        _ptr = NULL;
+ +        _length = 0;
+ +        _memtype = NCVMemoryTypeNone;
+ +    }
+ +
+ +    NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
+ +    {
+ +        if (howMuch == 0)
+ +        {
+ +            ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);
+ +            howMuch = this->_length * sizeof(T);
+ +        }
+ +        else
+ +        {
+ +            ncvAssertReturn(dst._length * sizeof(T) >= howMuch &&
+ +                this->_length * sizeof(T) >= howMuch &&
+ +                howMuch > 0, NCV_MEM_COPY_ERROR);
+ +        }
+ +        ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
+ +                        (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
+ +
+ +        NCVStatus ncvStat = NCV_SUCCESS;
+ +        if (this->_memtype != NCVMemoryTypeNone)
+ +        {
+ +            ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
+ +                                       this->_ptr, this->_memtype,
+ +                                       howMuch, cuStream);
+ +        }
+ +
+ +        return ncvStat;
+ +    }
+ +
+ +    T *ptr() const {return this->_ptr;}
+ +    size_t length() const {return this->_length;}
+ +    NCVMemoryType memType() const {return this->_memtype;}
+ +
+ +protected:
+ +
+ +    T *_ptr;
+ +    size_t _length;
+ +    NCVMemoryType _memtype;
+ +};
+ +
+ +
+ +/**
+ +* NCVVectorAlloc
+ +*/
+ +template <class T>
+ +class NCVVectorAlloc : public NCVVector<T>
+ +{
+ +    NCVVectorAlloc();
+ +    NCVVectorAlloc(const NCVVectorAlloc &);
+ +    NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&);
+ +
+ +public:
+ +
+ +    NCVVectorAlloc(INCVMemAllocator &allocator_, Ncv32u length_)
+ +        :
+ +        allocator(allocator_)
+ +    {
+ +        NCVStatus ncvStat;
+ +
+ +        this->clear();
+ +        this->allocatedMem.clear();
+ +
+ +        ncvStat = allocator.alloc(this->allocatedMem, length_ * sizeof(T));
+ +        ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );
+ +
+ +        this->_ptr = (T *)this->allocatedMem.begin.ptr;
+ +        this->_length = length_;
+ +        this->_memtype = this->allocatedMem.begin.memtype;
+ +    }
+ +
+ +    ~NCVVectorAlloc()
+ +    {
+ +        NCVStatus ncvStat;
+ +
+ +        ncvStat = allocator.dealloc(this->allocatedMem);
+ +        ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");
+ +
+ +        this->clear();
+ +    }
+ +
+ +    NcvBool isMemAllocated() const
+ +    {
+ +        return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
+ +    }
+ +
+ +    Ncv32u getAllocatorsAlignment() const
+ +    {
+ +        return allocator.alignment();
+ +    }
+ +
+ +    NCVMemSegment getSegment() const
+ +    {
+ +        return allocatedMem;
+ +    }
+ +
+ +private:
+ +    INCVMemAllocator &allocator;
+ +    NCVMemSegment allocatedMem;
+ +};
+ +
+ +
+ +/**
+ +* NCVVectorReuse
+ +*/
+ +template <class T>
+ +class NCVVectorReuse : public NCVVector<T>
+ +{
+ +    NCVVectorReuse();
+ +    NCVVectorReuse(const NCVVectorReuse &);
+ +
+ +public:
+ +
+ +    explicit NCVVectorReuse(const NCVMemSegment &memSegment)
+ +    {
+ +        this->bReused = false;
+ +        this->clear();
+ +
+ +        this->_length = memSegment.size / sizeof(T);
+ +        this->_ptr = (T *)memSegment.begin.ptr;
+ +        this->_memtype = memSegment.begin.memtype;
+ +
+ +        this->bReused = true;
+ +    }
+ +
+ +    NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length_)
+ +    {
+ +        this->bReused = false;
+ +        this->clear();
+ +
+ +        ncvAssertPrintReturn(length_ * sizeof(T) <= memSegment.size, \
+ +            "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
+ +
+ +        this->_length = length_;
+ +        this->_ptr = (T *)memSegment.begin.ptr;
+ +        this->_memtype = memSegment.begin.memtype;
+ +
+ +        this->bReused = true;
+ +    }
+ +
+ +    NcvBool isMemReused() const
+ +    {
+ +        return this->bReused;
+ +    }
+ +
+ +private:
+ +
+ +    NcvBool bReused;
+ +};
+ +
+ +
+ +/**
+ +* NCVMatrix (2D)
+ +*/
+ +template <class T>
+ +class NCVMatrix
+ +{
+ +    NCVMatrix(const NCVMatrix &);
+ +
+ +public:
+ +
+ +    NCVMatrix()
+ +    {
+ +        clear();
+ +    }
+ +
+ +    virtual ~NCVMatrix() {}
+ +
+ +    void clear()
+ +    {
+ +        _ptr = NULL;
+ +        _pitch = 0;
+ +        _width = 0;
+ +        _height = 0;
+ +        _memtype = NCVMemoryTypeNone;
+ +    }
+ +
+ +    Ncv32u stride() const
+ +    {
+ +        return _pitch / sizeof(T);
+ +    }
+ +
+ +    //a side effect of this function is that it copies everything in a single chunk, so the "padding" will be overwritten
+ +    NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
+ +    {
+ +        if (howMuch == 0)
+ +        {
+ +            ncvAssertReturn(dst._pitch == this->_pitch &&
+ +                            dst._height == this->_height, NCV_MEM_COPY_ERROR);
+ +            howMuch = this->_pitch * this->_height;
+ +        }
+ +        else
+ +        {
+ +            ncvAssertReturn(dst._pitch * dst._height >= howMuch &&
+ +                            this->_pitch * this->_height >= howMuch &&
+ +                            howMuch > 0, NCV_MEM_COPY_ERROR);
+ +        }
+ +        ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
+ +                        (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
+ +
+ +        NCVStatus ncvStat = NCV_SUCCESS;
+ +        if (this->_memtype != NCVMemoryTypeNone)
+ +        {
+ +            ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
+ +                                       this->_ptr, this->_memtype,
+ +                                       howMuch, cuStream);
+ +        }
+ +
+ +        return ncvStat;
+ +    }
+ +
+ +    NCVStatus copy2D(NCVMatrix<T> &dst, NcvSize32u roi, cudaStream_t cuStream) const
+ +    {
+ +        ncvAssertReturn(this->width() >= roi.width && this->height() >= roi.height &&
+ +                        dst.width() >= roi.width && dst.height() >= roi.height, NCV_MEM_COPY_ERROR);
+ +        ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
+ +                        (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
+ +
+ +        NCVStatus ncvStat = NCV_SUCCESS;
+ +        if (this->_memtype != NCVMemoryTypeNone)
+ +        {
+ +            ncvStat = memSegCopyHelper2D(dst._ptr, dst._pitch, dst._memtype,
+ +                                         this->_ptr, this->_pitch, this->_memtype,
+ +                                         roi.width * sizeof(T), roi.height, cuStream);
+ +        }
+ +
+ +        return ncvStat;
+ +    }
+ +
+ +    T& at(Ncv32u x, Ncv32u y) const
+ +    {
+ +        NcvBool bOutRange = (x >= this->_width || y >= this->_height);
+ +        ncvAssertPrintCheck(!bOutRange, "Error addressing matrix");
+ +        if (bOutRange)
+ +        {
+ +            return *this->_ptr;
+ +        }
+ +        return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x];
+ +    }
+ +
+ +    T *ptr() const {return this->_ptr;}
+ +    Ncv32u width() const {return this->_width;}
+ +    Ncv32u height() const {return this->_height;}
+ +    NcvSize32u size() const {return NcvSize32u(this->_width, this->_height);}
+ +    Ncv32u pitch() const {return this->_pitch;}
+ +    NCVMemoryType memType() const {return this->_memtype;}
+ +
+ +protected:
+ +
+ +    T *_ptr;
+ +    Ncv32u _width;
+ +    Ncv32u _height;
+ +    Ncv32u _pitch;
+ +    NCVMemoryType _memtype;
+ +};
+ +
+ +
+ +/**
+ +* NCVMatrixAlloc
+ +*/
+ +template <class T>
+ +class NCVMatrixAlloc : public NCVMatrix<T>
+ +{
+ +    NCVMatrixAlloc();
+ +    NCVMatrixAlloc(const NCVMatrixAlloc &);
+ +    NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);
+ +public:
+ +
+ +    NCVMatrixAlloc(INCVMemAllocator &allocator_, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0)
+ +        :
+ +        allocator(allocator_)
+ +    {
+ +        NCVStatus ncvStat;
+ +
+ +        this->clear();
+ +        this->allocatedMem.clear();
+ +
+ +        Ncv32u widthBytes = width_ * sizeof(T);
+ +        Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());
+ +
+ +        if (pitch_ != 0)
+ +        {
+ +            ncvAssertPrintReturn(pitch_ >= pitchBytes &&
+ +                (pitch_ & (allocator.alignment() - 1)) == 0,
+ +                "NCVMatrixAlloc ctor:: incorrect pitch passed", );
+ +            pitchBytes = pitch_;
+ +        }
+ +
+ +        Ncv32u requiredAllocSize = pitchBytes * height_;
+ +
+ +        ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);
+ +        ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );
+ +
+ +        this->_ptr = (T *)this->allocatedMem.begin.ptr;
+ +        this->_width = width_;
+ +        this->_height = height_;
+ +        this->_pitch = pitchBytes;
+ +        this->_memtype = this->allocatedMem.begin.memtype;
+ +    }
+ +
+ +    ~NCVMatrixAlloc()
+ +    {
+ +        NCVStatus ncvStat;
+ +
+ +        ncvStat = allocator.dealloc(this->allocatedMem);
+ +        ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");
+ +
+ +        this->clear();
+ +    }
+ +
+ +    NcvBool isMemAllocated() const
+ +    {
+ +        return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
+ +    }
+ +
+ +    Ncv32u getAllocatorsAlignment() const
+ +    {
+ +        return allocator.alignment();
+ +    }
+ +
+ +    NCVMemSegment getSegment() const
+ +    {
+ +        return allocatedMem;
+ +    }
+ +
+ +private:
+ +
+ +    INCVMemAllocator &allocator;
+ +    NCVMemSegment allocatedMem;
+ +};
+ +
+ +
+ +/**
+ +* NCVMatrixReuse
+ +*/
+ +template <class T>
+ +class NCVMatrixReuse : public NCVMatrix<T>
+ +{
+ +    NCVMatrixReuse();
+ +    NCVMatrixReuse(const NCVMatrixReuse &);
+ +
+ +public:
+ +
+ +    NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0, NcvBool bSkipPitchCheck=false)
+ +    {
+ +        this->bReused = false;
+ +        this->clear();
+ +
+ +        Ncv32u widthBytes = width_ * sizeof(T);
+ +        Ncv32u pitchBytes = alignUp(widthBytes, alignment);
+ +
+ +        if (pitch_ != 0)
+ +        {
+ +            if (!bSkipPitchCheck)
+ +            {
+ +                ncvAssertPrintReturn(pitch_ >= pitchBytes &&
+ +                    (pitch_ & (alignment - 1)) == 0,
+ +                    "NCVMatrixReuse ctor:: incorrect pitch passed", );
+ +            }
+ +            else
+ +            {
+ +                ncvAssertPrintReturn(pitch_ >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );
+ +            }
+ +            pitchBytes = pitch_;
+ +        }
+ +
+ +        ncvAssertPrintReturn(pitchBytes * height_ <= memSegment.size, \
+ +            "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
+ +
+ +        this->_width = width_;
+ +        this->_height = height_;
+ +        this->_pitch = pitchBytes;
+ +        this->_ptr = (T *)memSegment.begin.ptr;
+ +        this->_memtype = memSegment.begin.memtype;
+ +
+ +        this->bReused = true;
+ +    }
+ +
+ +    NCVMatrixReuse(const NCVMatrix<T> &mat, NcvRect32u roi)
+ +    {
+ +        this->bReused = false;
+ +        this->clear();
+ +
+ +        ncvAssertPrintReturn(roi.x < mat.width() && roi.y < mat.height() && \
+ +            roi.x + roi.width <= mat.width() && roi.y + roi.height <= mat.height(),
+ +            "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
+ +
+ +        this->_width = roi.width;
+ +        this->_height = roi.height;
+ +        this->_pitch = mat.pitch();
+ +        this->_ptr = &mat.at(roi.x, roi.y);
+ +        this->_memtype = mat.memType();
+ +
+ +        this->bReused = true;
+ +    }
+ +
+ +    NcvBool isMemReused() const
+ +    {
+ +        return this->bReused;
+ +    }
+ +
+ +private:
+ +
+ +    NcvBool bReused;
+ +};
+ +
+ +
+ +/**
+ +* Operations with rectangles
+ +*/
+ +CV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
+ +                                              Ncv32u minNeighbors, Ncv32f intersectEps, NCVVector<Ncv32u> *hypothesesWeights);
+ +
+ +
+ +CV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+ +                                           NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
+ +
+ +
+ +CV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+ +                                            NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
+ +
+ +
+ +CV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+ +                                             NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
+ +
+ +
+ +CV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
+ +                                              NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
+ +
+ +
+ +#define CLAMP(x,a,b)        ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
+ +#define CLAMP_TOP(x, a)     (((x) > (a)) ? (a) : (x))
+ +#define CLAMP_BOTTOM(x, a)  (((x) < (a)) ? (a) : (x))
+ +#define CLAMP_0_255(x)      CLAMP(x,0,255)
+ +
+ +
+ +#define SUB_BEGIN(type, name)    struct { __inline type name
+ +#define SUB_END(name)            } name;
+ +#define SUB_CALL(name)           name.name
+ +
+ +#define SQR(x)              ((x)*(x))
+ +
+ +
+ +#define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
+ +    NCVMatrixAlloc<type> name(alloc, width, height); \
+ +    ncvAssertReturn(name.isMemAllocated(), err);
+ +
+ +
+ +
+ +#endif // _ncv_hpp_
diff --cc modules/gpulegacy/src/cuda/NCVPixelOperations.hpp

index 6409fab,0000000..5cf902a

mode 100644,000000..100644
--- 1/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
--- /dev/null
+++ b/modules/gpulegacy/src/cuda/NCVPixelOperations.hpp
@@@ -1,351 -1,0 +1,351 @@@
- template<> static inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  CHAR_MAX;}
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef _ncv_pixel_operations_hpp_
+ +#define _ncv_pixel_operations_hpp_
+ +
+ +#include <limits.h>
+ +#include <float.h>
+ +#include "opencv2/gpulegacy/NCV.hpp"
+ +
+ +template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
+ +template<> static inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
+ +template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
+ +template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return  UINT_MAX;}
- template<> static inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return CHAR_MIN;}
++template<> static inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  SCHAR_MAX;}
+ +template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return  SHRT_MAX;}
+ +template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return   INT_MAX;}
+ +template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return   FLT_MAX;}
+ +template<> static inline __host__ __device__ Ncv64f _pixMaxVal<Ncv64f>() {return   DBL_MAX;}
+ +
+ +template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
+ +template<> static inline __host__ __device__ Ncv8u  _pixMinVal<Ncv8u>()  {return 0;}
+ +template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
+ +template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
++template<> static inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return SCHAR_MIN;}
+ +template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
+ +template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
+ +template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
+ +template<> static inline __host__ __device__ Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
+ +
+ +template<typename Tvec> struct TConvVec2Base;
+ +template<> struct TConvVec2Base<uchar1>  {typedef Ncv8u TBase;};
+ +template<> struct TConvVec2Base<uchar3>  {typedef Ncv8u TBase;};
+ +template<> struct TConvVec2Base<uchar4>  {typedef Ncv8u TBase;};
+ +template<> struct TConvVec2Base<ushort1> {typedef Ncv16u TBase;};
+ +template<> struct TConvVec2Base<ushort3> {typedef Ncv16u TBase;};
+ +template<> struct TConvVec2Base<ushort4> {typedef Ncv16u TBase;};
+ +template<> struct TConvVec2Base<uint1>   {typedef Ncv32u TBase;};
+ +template<> struct TConvVec2Base<uint3>   {typedef Ncv32u TBase;};
+ +template<> struct TConvVec2Base<uint4>   {typedef Ncv32u TBase;};
+ +template<> struct TConvVec2Base<float1>  {typedef Ncv32f TBase;};
+ +template<> struct TConvVec2Base<float3>  {typedef Ncv32f TBase;};
+ +template<> struct TConvVec2Base<float4>  {typedef Ncv32f TBase;};
+ +template<> struct TConvVec2Base<double1> {typedef Ncv64f TBase;};
+ +template<> struct TConvVec2Base<double3> {typedef Ncv64f TBase;};
+ +template<> struct TConvVec2Base<double4> {typedef Ncv64f TBase;};
+ +
+ +#define NC(T)       (sizeof(T) / sizeof(TConvVec2Base<T>::TBase))
+ +
+ +template<typename TBase, Ncv32u NC> struct TConvBase2Vec;
+ +template<> struct TConvBase2Vec<Ncv8u, 1>  {typedef uchar1 TVec;};
+ +template<> struct TConvBase2Vec<Ncv8u, 3>  {typedef uchar3 TVec;};
+ +template<> struct TConvBase2Vec<Ncv8u, 4>  {typedef uchar4 TVec;};
+ +template<> struct TConvBase2Vec<Ncv16u, 1> {typedef ushort1 TVec;};
+ +template<> struct TConvBase2Vec<Ncv16u, 3> {typedef ushort3 TVec;};
+ +template<> struct TConvBase2Vec<Ncv16u, 4> {typedef ushort4 TVec;};
+ +template<> struct TConvBase2Vec<Ncv32u, 1> {typedef uint1 TVec;};
+ +template<> struct TConvBase2Vec<Ncv32u, 3> {typedef uint3 TVec;};
+ +template<> struct TConvBase2Vec<Ncv32u, 4> {typedef uint4 TVec;};
+ +template<> struct TConvBase2Vec<Ncv32f, 1> {typedef float1 TVec;};
+ +template<> struct TConvBase2Vec<Ncv32f, 3> {typedef float3 TVec;};
+ +template<> struct TConvBase2Vec<Ncv32f, 4> {typedef float4 TVec;};
+ +template<> struct TConvBase2Vec<Ncv64f, 1> {typedef double1 TVec;};
+ +template<> struct TConvBase2Vec<Ncv64f, 3> {typedef double3 TVec;};
+ +template<> struct TConvBase2Vec<Ncv64f, 4> {typedef double4 TVec;};
+ +
+ +//TODO: consider using CUDA intrinsics to avoid branching
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
+ +
+ +//TODO: consider using CUDA intrinsics to avoid branching
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
+ +template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
+ +
+ +template<typename Tout> inline Tout _pixMakeZero();
+ +template<> static inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
+ +template<> static inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
+ +template<> static inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
+ +template<> static inline __host__ __device__ ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
+ +template<> static inline __host__ __device__ ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
+ +template<> static inline __host__ __device__ ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
+ +template<> static inline __host__ __device__ uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
+ +template<> static inline __host__ __device__ uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
+ +template<> static inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
+ +template<> static inline __host__ __device__ float1 _pixMakeZero<float1>() {return make_float1(0.f);}
+ +template<> static inline __host__ __device__ float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
+ +template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
+ +template<> static inline __host__ __device__ double1 _pixMakeZero<double1>() {return make_double1(0.);}
+ +template<> static inline __host__ __device__ double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
+ +template<> static inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
+ +
+ +static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
+ +static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
+ +static inline __host__ __device__ uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
+ +static inline __host__ __device__ ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
+ +static inline __host__ __device__ ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
+ +static inline __host__ __device__ ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
+ +static inline __host__ __device__ uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
+ +static inline __host__ __device__ uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
+ +static inline __host__ __device__ uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
+ +static inline __host__ __device__ float1 _pixMake(Ncv32f x) {return make_float1(x);}
+ +static inline __host__ __device__ float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
+ +static inline __host__ __device__ float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
+ +static inline __host__ __device__ double1 _pixMake(Ncv64f x) {return make_double1(x);}
+ +static inline __host__ __device__ double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
+ +static inline __host__ __device__ double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
+ +
+ +
+ +template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix);};
+ +
+ +template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 1> {
+ +static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
+ +{
+ +    Tout out;
+ +    _TDemoteClampZ(pix.x, out.x);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 3> {
+ +static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
+ +{
+ +    Tout out;
+ +    _TDemoteClampZ(pix.x, out.x);
+ +    _TDemoteClampZ(pix.y, out.y);
+ +    _TDemoteClampZ(pix.z, out.z);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 4> {
+ +static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
+ +{
+ +    Tout out;
+ +    _TDemoteClampZ(pix.x, out.x);
+ +    _TDemoteClampZ(pix.y, out.y);
+ +    _TDemoteClampZ(pix.z, out.z);
+ +    _TDemoteClampZ(pix.w, out.w);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampZ(Tin &pix)
+ +{
+ +    return __pixDemoteClampZ_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampZ_CN(pix);
+ +}
+ +
+ +
+ +template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix);};
+ +
+ +template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 1> {
+ +static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
+ +{
+ +    Tout out;
+ +    _TDemoteClampNN(pix.x, out.x);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 3> {
+ +static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
+ +{
+ +    Tout out;
+ +    _TDemoteClampNN(pix.x, out.x);
+ +    _TDemoteClampNN(pix.y, out.y);
+ +    _TDemoteClampNN(pix.z, out.z);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 4> {
+ +static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
+ +{
+ +    Tout out;
+ +    _TDemoteClampNN(pix.x, out.x);
+ +    _TDemoteClampNN(pix.y, out.y);
+ +    _TDemoteClampNN(pix.z, out.z);
+ +    _TDemoteClampNN(pix.w, out.w);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampNN(Tin &pix)
+ +{
+ +    return __pixDemoteClampNN_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampNN_CN(pix);
+ +}
+ +
+ +
+ +template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w);};
+ +
+ +template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 1> {
+ +static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
+ +{
+ +    Tout out;
+ +    typedef typename TConvVec2Base<Tout>::TBase TBout;
+ +    out.x = (TBout)(pix.x * w);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 3> {
+ +static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
+ +{
+ +    Tout out;
+ +    typedef typename TConvVec2Base<Tout>::TBase TBout;
+ +    out.x = (TBout)(pix.x * w);
+ +    out.y = (TBout)(pix.y * w);
+ +    out.z = (TBout)(pix.z * w);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 4> {
+ +static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
+ +{
+ +    Tout out;
+ +    typedef typename TConvVec2Base<Tout>::TBase TBout;
+ +    out.x = (TBout)(pix.x * w);
+ +    out.y = (TBout)(pix.y * w);
+ +    out.z = (TBout)(pix.z * w);
+ +    out.w = (TBout)(pix.w * w);
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout, typename Tw> static __host__ __device__ Tout _pixScale(Tin &pix, Tw w)
+ +{
+ +    return __pixScale_CN<Tin, Tout, Tw, NC(Tin)>::_pixScale_CN(pix, w);
+ +}
+ +
+ +
+ +template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
+ +
+ +template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 1> {
+ +static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
+ +{
+ +    Tout out;
+ +    out.x = pix1.x + pix2.x;
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 3> {
+ +static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
+ +{
+ +    Tout out;
+ +    out.x = pix1.x + pix2.x;
+ +    out.y = pix1.y + pix2.y;
+ +    out.z = pix1.z + pix2.z;
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 4> {
+ +static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
+ +{
+ +    Tout out;
+ +    out.x = pix1.x + pix2.x;
+ +    out.y = pix1.y + pix2.y;
+ +    out.z = pix1.z + pix2.z;
+ +    out.w = pix1.w + pix2.w;
+ +    return out;
+ +}};
+ +
+ +template<typename Tin, typename Tout> static __host__ __device__ Tout _pixAdd(Tout &pix1, Tin &pix2)
+ +{
+ +    return __pixAdd_CN<Tin, Tout, NC(Tin)>::_pixAdd_CN(pix1, pix2);
+ +}
+ +
+ +
+ +template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
+ +
+ +template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 1> {
+ +static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
+ +{
+ +    return Tout(SQR(pix1.x - pix2.x));
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 3> {
+ +static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
+ +{
+ +    return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z));
+ +}};
+ +
+ +template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 4> {
+ +static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
+ +{
+ +    return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w));
+ +}};
+ +
+ +template<typename Tin, typename Tout> static __host__ __device__ Tout _pixDist(Tin &pix1, Tin &pix2)
+ +{
+ +    return __pixDist_CN<Tin, Tout, NC(Tin)>::_pixDist_CN(pix1, pix2);
+ +}
+ +
+ +
+ +template <typename T> struct TAccPixWeighted;
+ +template<> struct TAccPixWeighted<uchar1> {typedef double1 type;};
+ +template<> struct TAccPixWeighted<uchar3> {typedef double3 type;};
+ +template<> struct TAccPixWeighted<uchar4> {typedef double4 type;};
+ +template<> struct TAccPixWeighted<ushort1> {typedef double1 type;};
+ +template<> struct TAccPixWeighted<ushort3> {typedef double3 type;};
+ +template<> struct TAccPixWeighted<ushort4> {typedef double4 type;};
+ +template<> struct TAccPixWeighted<float1> {typedef double1 type;};
+ +template<> struct TAccPixWeighted<float3> {typedef double3 type;};
+ +template<> struct TAccPixWeighted<float4> {typedef double4 type;};
+ +
+ +template<typename Tfrom> struct TAccPixDist {};
+ +template<> struct TAccPixDist<uchar1> {typedef Ncv32u type;};
+ +template<> struct TAccPixDist<uchar3> {typedef Ncv32u type;};
+ +template<> struct TAccPixDist<uchar4> {typedef Ncv32u type;};
+ +template<> struct TAccPixDist<ushort1> {typedef Ncv32u type;};
+ +template<> struct TAccPixDist<ushort3> {typedef Ncv32u type;};
+ +template<> struct TAccPixDist<ushort4> {typedef Ncv32u type;};
+ +template<> struct TAccPixDist<float1> {typedef Ncv32f type;};
+ +template<> struct TAccPixDist<float3> {typedef Ncv32f type;};
+ +template<> struct TAccPixDist<float4> {typedef Ncv32f type;};
+ +
+ +#endif //_ncv_pixel_operations_hpp_
diff --cc modules/highgui/include/opencv2/highgui/cap_ios.h
Simple merge
diff --cc modules/highgui/src/window_QT.cpp
Simple merge
diff --cc modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java
Simple merge
diff --cc modules/nonfree/test/test_features2d.cpp

index 09997ab,4cce77b..bff8a38
--- 1/modules/nonfree/test/test_features2d.cpp
--- 2/modules/nonfree/test/test_features2d.cpp
+++ b/modules/nonfree/test/test_features2d.cpp
@@@ -1149,3 -1145,77 +1149,76 @@@ protected
   
   TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); }
   TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); }
- -
+ 
+ class FeatureDetectorUsingMaskTest : public cvtest::BaseTest
+ {
+ public:
+     FeatureDetectorUsingMaskTest(const Ptr<FeatureDetector>& featureDetector) :
+         featureDetector_(featureDetector)
+     {
+         CV_Assert(!featureDetector_.empty());
+     }
+ 
+ protected:
+ 
+     void run(int)
+     {
+         const int nStepX = 2;
+         const int nStepY = 2;
+ 
+         const string imageFilename = string(ts->get_data_path()) + "/features2d/tsukuba.png";
+ 
+         Mat image = imread(imageFilename);
+         if(image.empty())
+         {
+             ts->printf(cvtest::TS::LOG, "Image %s can not be read.\n", imageFilename.c_str());
+             ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+             return;
+         }
+ 
+         Mat mask(image.size(), CV_8U);
+ 
+         const int stepX = image.size().width / nStepX;
+         const int stepY = image.size().height / nStepY;
+ 
+         vector<KeyPoint> keyPoints;
+         vector<Point2f> points;
+         for(int i=0; i<nStepX; ++i)
+             for(int j=0; j<nStepY; ++j)
+             {
+ 
+                 mask.setTo(0);
+                 Rect whiteArea(i * stepX, j * stepY, stepX, stepY);
+                 mask(whiteArea).setTo(255);
+ 
+                 featureDetector_->detect(image, keyPoints, mask);
+                 KeyPoint::convert(keyPoints, points);
+ 
+                 for(size_t k=0; k<points.size(); ++k)
+                 {
+                     if ( !whiteArea.contains(points[k]) )
+                     {
+                         ts->printf(cvtest::TS::LOG, "The feature point is outside of the mask.");
+                         ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+                         return;
+                     }
+                 }
+             }
+ 
+         ts->set_failed_test_info( cvtest::TS::OK );
+     }
+ 
+     Ptr<FeatureDetector> featureDetector_;
+ };
+ 
+ TEST(Features2d_SIFT_using_mask, regression)
+ {
+     FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SIFT"));
+     test.safe_run();
+ }
+ 
+ TEST(DISABLED_Features2d_SURF_using_mask, regression)
+ {
+     FeatureDetectorUsingMaskTest test(Algorithm::create<FeatureDetector>("Feature2D.SURF"));
+     test.safe_run();
+ }
diff --cc modules/ocl/include/opencv2/ocl.hpp

index 9b5e761,0000000..a59aae1

mode 100644,000000..100644
--- 1/modules/ocl/include/opencv2/ocl.hpp
--- /dev/null
+++ b/modules/ocl/include/opencv2/ocl.hpp
@@@ -1,1787 -1,0 +1,1795 @@@
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                           License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+ +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other oclMaterials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_OCL_HPP__
+ +#define __OPENCV_OCL_HPP__
+ +
+ +#include <memory>
+ +#include <vector>
+ +
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/objdetect.hpp"
+ +
+ +namespace cv
+ +{
+ +    namespace ocl
+ +    {
+ +        enum
+ +        {
+ +            CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
+ +            CVCL_DEVICE_TYPE_CPU         = (1 << 1),
+ +            CVCL_DEVICE_TYPE_GPU         = (1 << 2),
+ +            CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
+ +            //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
+ +            CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
+ +        };
+ +
+ +        enum DevMemRW
+ +        {
+ +            DEVICE_MEM_R_W = 0,
+ +            DEVICE_MEM_R_ONLY,
+ +            DEVICE_MEM_W_ONLY
+ +        };
+ +
+ +        enum DevMemType
+ +        {
+ +            DEVICE_MEM_DEFAULT = 0,
+ +            DEVICE_MEM_AHP,         //alloc host pointer
+ +            DEVICE_MEM_UHP,         //use host pointer
+ +            DEVICE_MEM_CHP,         //copy host pointer
+ +            DEVICE_MEM_PM           //persistent memory
+ +        };
+ +
+ +        //Get the global device memory and read/write type
+ +        //return 1 if unified memory system supported, otherwise return 0
+ +        CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type);
+ +
+ +        //Set the global device memory and read/write type,
+ +        //the newly generated oclMat will all use this type
+ +        //return -1 if the target type is unsupported, otherwise return 0
+ +        CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
+ +
+ +        //this class contains ocl runtime information
+ +        class CV_EXPORTS Info
+ +        {
+ +        public:
+ +            struct Impl;
+ +            Impl *impl;
+ +
+ +            Info();
+ +            Info(const Info &m);
+ +            ~Info();
+ +            void release();
+ +            Info &operator = (const Info &m);
+ +            std::vector<String> DeviceName;
+ +            String PlatformName;
+ +        };
+ +        //////////////////////////////// Initialization & Info ////////////////////////
+ +        //this function may be obsoleted
+ +        //CV_EXPORTS cl_device_id getDevice();
+ +        //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
+ +        //each Info relates to an OpenCL platform
+ +        //there is one or more devices in each platform, each one has a separate name
+ +        CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
+ +
+ +        //set device you want to use, optional function after getDevice be called
+ +        //the devnum is the index of the selected device in DeviceName vector of INfo
+ +        CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
+ +
+ +        //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+ +        //returns cl_context *
+ +        CV_EXPORTS void* getoclContext();
+ +        //returns cl_command_queue *
+ +        CV_EXPORTS void* getoclCommandQueue();
+ +
+ +        //explicit call clFinish. The global command queue will be used.
+ +        CV_EXPORTS void finish();
+ +
+ +        //this function enable ocl module to use customized cl_context and cl_command_queue
+ +        //getDevice also need to be called before this function
+ +        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
+ +
+ +        //returns true when global OpenCL context is initialized
+ +        CV_EXPORTS bool initialized();
+ +
+ +        //////////////////////////////// OpenCL context ////////////////////////
+ +        //This is a global singleton class used to represent a OpenCL context.
+ +        class CV_EXPORTS Context
+ +        {
+ +        protected:
+ +            Context();
+ +            friend class std::auto_ptr<Context>;
+ +            friend bool initialized();
+ +        private:
+ +            static std::auto_ptr<Context> clCxt;
+ +            static int val;
+ +        public:
+ +            ~Context();
+ +            void release();
+ +            Info::Impl* impl;
+ +
+ +            static Context *getContext();
+ +            static void setContext(Info &oclinfo);
+ +
+ +            enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
+ +            bool supportsFeature(int ftype);
+ +            size_t computeUnits();
+ +            size_t maxWorkGroupSize();
+ +            void* oclContext();
+ +            void* oclCommandQueue();
+ +        };
+ +
+ +        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ +        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ +                                                        const char **source, String kernelName,
+ +                                                        size_t globalThreads[3], size_t localThreads[3],
+ +                                                        std::vector< std::pair<size_t, const void *> > &args,
+ +                                                        int channels, int depth, const char *build_options,
+ +                                                        bool finish = true, bool measureKernelTime = false,
+ +                                                        bool cleanUp = true);
+ +
+ +        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ +        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+ +                                                        const char **fileName, const int numFiles, String kernelName,
+ +                                                        size_t globalThreads[3], size_t localThreads[3],
+ +                                                        std::vector< std::pair<size_t, const void *> > &args,
+ +                                                        int channels, int depth, const char *build_options,
+ +                                                        bool finish = true, bool measureKernelTime = false,
+ +                                                        bool cleanUp = true);
+ +
+ +        //! Enable or disable OpenCL program binary caching onto local disk
+ +        // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
+ +        // compiled OpenCL program to be cached to the path automatically as "path/*.clb" 
+ +        // binary file, which will be reused when the OpenCV executable is started again. 
+ +        //
+ +        // Caching mode is controlled by the following enums
+ +        // Notes
+ +        //   1. the feature is by default enabled when OpenCV is built in release mode.
+ +        //   2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler;
+ +        //      for GNU compilers, the function always treats the build as release mode (enabled by default).
+ +        enum
+ +        {
+ +            CACHE_NONE    = 0,        // do not cache OpenCL binary
+ +            CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
+ +            CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
+ +            CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
+ +            CACHE_UPDATE  = 0x1 << 2  // if the binary cache file with the same name is already on the disk, it will be updated.
+ +        };
+ +        CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
+ +
+ +        //! set where binary cache to be saved to 
+ +        CV_EXPORTS void setBinpath(const char *path);
+ +
+ +        class CV_EXPORTS oclMatExpr;
+ +        //////////////////////////////// oclMat ////////////////////////////////
+ +        class CV_EXPORTS oclMat
+ +        {
+ +        public:
+ +            //! default constructor
+ +            oclMat();
+ +            //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ +            oclMat(int rows, int cols, int type);
+ +            oclMat(Size size, int type);
+ +            //! constucts oclMatrix and fills it with the specified value _s.
+ +            oclMat(int rows, int cols, int type, const Scalar &s);
+ +            oclMat(Size size, int type, const Scalar &s);
+ +            //! copy constructor
+ +            oclMat(const oclMat &m);
+ +
+ +            //! constructor for oclMatrix headers pointing to user-allocated data
+ +            oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +            oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +
+ +            //! creates a matrix header for a part of the bigger matrix
+ +            oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
+ +            oclMat(const oclMat &m, const Rect &roi);
+ +
+ +            //! builds oclMat from Mat. Perfom blocking upload to device.
+ +            explicit oclMat (const Mat &m);
+ +
+ +            //! destructor - calls release()
+ +            ~oclMat();
+ +
+ +            //! assignment operators
+ +            oclMat &operator = (const oclMat &m);
+ +            //! assignment operator. Perfom blocking upload to device.
+ +            oclMat &operator = (const Mat &m);
+ +            oclMat &operator = (const oclMatExpr& expr);
+ +
+ +            //! pefroms blocking upload data to oclMat.
+ +            void upload(const cv::Mat &m);
+ +
+ +
+ +            //! downloads data from device to host memory. Blocking calls.
+ +            operator Mat() const;
+ +            void download(cv::Mat &m) const;
+ +
++            //! convert to _InputArray
++            operator _InputArray();
++
++            //! convert to _OutputArray
++            operator _OutputArray();
+ +
+ +            //! returns a new oclMatrix header for the specified row
+ +            oclMat row(int y) const;
+ +            //! returns a new oclMatrix header for the specified column
+ +            oclMat col(int x) const;
+ +            //! ... for the specified row span
+ +            oclMat rowRange(int startrow, int endrow) const;
+ +            oclMat rowRange(const Range &r) const;
+ +            //! ... for the specified column span
+ +            oclMat colRange(int startcol, int endcol) const;
+ +            oclMat colRange(const Range &r) const;
+ +
+ +            //! returns deep copy of the oclMatrix, i.e. the data is copied
+ +            oclMat clone() const;
+ +            //! copies the oclMatrix content to "m".
+ +            // It calls m.create(this->size(), this->type()).
+ +            // It supports any data type
+ +            void copyTo( oclMat &m ) const;
+ +            //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            void copyTo( oclMat &m, const oclMat &mask ) const;
+ +            //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
+ +
+ +            void assignTo( oclMat &m, int type = -1 ) const;
+ +
+ +            //! sets every oclMatrix element to s
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            oclMat& operator = (const Scalar &s);
+ +            //! sets some of the oclMatrix elements to s, according to the mask
+ +            //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4
+ +            oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
+ +            //! creates alternative oclMatrix header for the same data, with different
+ +            // number of channels and/or different number of rows. see cvReshape.
+ +            oclMat reshape(int cn, int rows = 0) const;
+ +
+ +            //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
+ +            // previous data is unreferenced if needed.
+ +            void create(int rows, int cols, int type);
+ +            void create(Size size, int type);
+ +
+ +            //! allocates new oclMatrix with specified device memory type.
+ +            void createEx(int rows, int cols, int type,
+ +                          DevMemRW rw_type, DevMemType mem_type, void* hptr = 0);
+ +            void createEx(Size size, int type, DevMemRW rw_type,
+ +                          DevMemType mem_type, void* hptr = 0);
+ +
+ +            //! decreases reference counter;
+ +            // deallocate the data when reference counter reaches 0.
+ +            void release();
+ +
+ +            //! swaps with other smart pointer
+ +            void swap(oclMat &mat);
+ +
+ +            //! locates oclMatrix header within a parent oclMatrix. See below
+ +            void locateROI( Size &wholeSize, Point &ofs ) const;
+ +            //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
+ +            oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ +            //! extracts a rectangular sub-oclMatrix
+ +            // (this is a generalized form of row, rowRange etc.)
+ +            oclMat operator()( Range rowRange, Range colRange ) const;
+ +            oclMat operator()( const Rect &roi ) const;
+ +
+ +            oclMat& operator+=( const oclMat& m );
+ +            oclMat& operator-=( const oclMat& m );
+ +            oclMat& operator*=( const oclMat& m );
+ +            oclMat& operator/=( const oclMat& m );
+ +
+ +            //! returns true if the oclMatrix data is continuous
+ +            // (i.e. when there are no gaps between successive rows).
+ +            // similar to CV_IS_oclMat_CONT(cvoclMat->type)
+ +            bool isContinuous() const;
+ +            //! returns element size in bytes,
+ +            // similar to CV_ELEM_SIZE(cvMat->type)
+ +            size_t elemSize() const;
+ +            //! returns the size of element channel in bytes.
+ +            size_t elemSize1() const;
+ +            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+ +            int type() const;
+ +            //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+ +            //! 3 channels element actually use 4 channel space
+ +            int ocltype() const;
+ +            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+ +            int depth() const;
+ +            //! returns element type, similar to CV_MAT_CN(cvMat->type)
+ +            int channels() const;
+ +            //! returns element type, return 4 for 3 channels element,
+ +            //!becuase 3 channels element actually use 4 channel space
+ +            int oclchannels() const;
+ +            //! returns step/elemSize1()
+ +            size_t step1() const;
+ +            //! returns oclMatrix size:
+ +            // width == number of columns, height == number of rows
+ +            Size size() const;
+ +            //! returns true if oclMatrix data is NULL
+ +            bool empty() const;
+ +
+ +            //! returns pointer to y-th row
+ +            uchar* ptr(int y = 0);
+ +            const uchar *ptr(int y = 0) const;
+ +
+ +            //! template version of the above method
+ +            template<typename _Tp> _Tp *ptr(int y = 0);
+ +            template<typename _Tp> const _Tp *ptr(int y = 0) const;
+ +
+ +            //! matrix transposition
+ +            oclMat t() const;
+ +
+ +            /*! includes several bit-fields:
+ +              - the magic signature
+ +              - continuity flag
+ +              - depth
+ +              - number of channels
+ +              */
+ +            int flags;
+ +            //! the number of rows and columns
+ +            int rows, cols;
+ +            //! a distance between successive rows in bytes; includes the gap if any
+ +            size_t step;
+ +            //! pointer to the data(OCL memory object)
+ +            uchar *data;
+ +
+ +            //! pointer to the reference counter;
+ +            // when oclMatrix points to user-allocated data, the pointer is NULL
+ +            int *refcount;
+ +
+ +            //! helper fields used in locateROI and adjustROI
+ +            //datastart and dataend are not used in current version
+ +            uchar *datastart;
+ +            uchar *dataend;
+ +
+ +            //! OpenCL context associated with the oclMat object.
+ +            Context *clCxt;
+ +            //add offset for handle ROI, calculated in byte
+ +            int offset;
+ +            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
+ +            int wholerows;
+ +            int wholecols;
+ +        };
+ +
++        // convert InputArray/OutputArray to oclMat references
++        CV_EXPORTS oclMat& getOclMatRef(InputArray src);
++        CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
+ +
+ +        ///////////////////// mat split and merge /////////////////////////////////
+ +        //! Compose a multi-channel array from several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
+ +        CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
+ +
+ +        //! Divides multi-channel array into several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void split(const oclMat &src, oclMat *dst);
+ +        CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
+ +
+ +        ////////////////////////////// Arithmetics ///////////////////////////////////
+ +        //#if defined DOUBLE_SUPPORT
+ +        //typedef double F;
+ +        //#else
+ +        //typedef float F;
+ +        //#endif
+ +        //    CV_EXPORTS void addWeighted(const oclMat& a,F  alpha, const oclMat& b,F beta,F gama, oclMat& c);
+ +        CV_EXPORTS void addWeighted(const oclMat &a, double  alpha, const oclMat &b, double beta, double gama, oclMat &c);
+ +        //! adds one matrix to another (c = a + b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! adds one matrix to another (c = a + b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ +        //! adds scalar to a matrix (c = a + s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ +        //! subtracts one matrix from another (c = a - b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! subtracts one matrix from another (c = a - b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask);
+ +        //! subtracts scalar from a matrix (c = a - s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat());
+ +        //! subtracts scalar from a matrix (c = a - s)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat());
+ +        //! computes element-wise product of the two arrays (c = a * b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+ +        //! multiplies matrix to a number (dst = scalar * src)
+ +        // supports CV_32FC1 only
+ +        CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
+ +        //! computes element-wise quotient of the two arrays (c = a / b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+ +        //! computes element-wise quotient of the two arrays (c = a / b)
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c);
+ +
+ +        //! compares elements of two arrays (c = a <cmpop> b)
+ +        // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types
+ +        CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop);
+ +
+ +        //! transposes the matrix
+ +        // supports  CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda)
+ +        CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes element-wise absolute difference of two arrays (c = abs(a - b))
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c);
+ +        //! computes element-wise absolute difference of array and scalar (c = abs(a - s))
+ +        // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
+ +        CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c);
+ +
+ +        //! computes mean value and standard deviation of all or selected array elements
+ +        // supports except CV_32F,CV_64F
+ +        CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
+ +
+ +        //! computes norm of array
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports only CV_8UC1 type
+ +        CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
+ +
+ +        //! computes norm of the difference between two arrays
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports only CV_8UC1 type
+ +        CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
+ +
+ +        //! reverses the order of the rows, columns or both in a matrix
+ +        // supports all types
+ +        CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode);
+ +
+ +        //! computes sum of array elements
+ +        // disabled until fix crash
+ +        // support all types
+ +        CV_EXPORTS Scalar sum(const oclMat &m);
+ +        CV_EXPORTS Scalar absSum(const oclMat &m);
+ +        CV_EXPORTS Scalar sqrSum(const oclMat &m);
+ +
+ +        //! finds global minimum and maximum array elements and returns their values
+ +        // support all C1 types
+ +
+ +        CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
+ +
+ +        //! finds global minimum and maximum array elements and returns their values with locations
+ +        // support all C1 types
+ +
+ +        CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
+ +                                  const oclMat &mask = oclMat());
+ +
+ +        //! counts non-zero array elements
+ +        // support all types
+ +        CV_EXPORTS int countNonZero(const oclMat &src);
+ +
+ +        //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+ +        // destination array will have the depth type as lut and the same channels number as source
+ +        //It supports 8UC1 8UC4 only
+ +        CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
+ +
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+ +        
+ +        //! only 8UC1 is supported now
+ +        class CV_EXPORTS CLAHE
+ +        {
+ +        public:
+ +            virtual void apply(const oclMat &src, oclMat &dst) = 0;
+ +
+ +            virtual void setClipLimit(double clipLimit) = 0;
+ +            virtual double getClipLimit() const = 0;
+ +
+ +            virtual void setTilesGridSize(Size tileGridSize) = 0;
+ +            virtual Size getTilesGridSize() const = 0;
+ +
+ +            virtual void collectGarbage() = 0;
+ +
+ +            virtual ~CLAHE() {}
+ +        };
+ +        CV_EXPORTS Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+ +        
+ +        //! bilateralFilter
+ +        // supports 8UC1 8UC4
+ +        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
+ +        //! computes exponent of each matrix element (b = e**a)
+ +        // supports only CV_32FC1 type
+ +        CV_EXPORTS void exp(const oclMat &a, oclMat &b);
+ +
+ +        //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
+ +        // supports only CV_32FC1 type
+ +        CV_EXPORTS void log(const oclMat &a, oclMat &b);
+ +
+ +        //! computes magnitude of each (x(i), y(i)) vector
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ +        CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ +
+ +        CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude);
+ +
+ +        //! computes angle (angle(i)) of each (x(i), y(i)) vector
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! the function raises every element of tne input array to p
+ +        //! support only CV_32F CV_64F type
+ +        CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
+ +
+ +        //! converts Cartesian coordinates to polar
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! converts polar coordinates to Cartesian
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
+ +
+ +        //! perfroms per-elements bit-wise inversion
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
+ +        //! calculates per-element bit-wise disjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! calculates per-element bit-wise conjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! calculates per-element bit-wise "exclusive or" operation
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! Logical operators
+ +        CV_EXPORTS oclMat operator ~ (const oclMat &);
+ +        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+ +
+ +
+ +        //! Mathematics operators
+ +        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
+ +
+ +        struct CV_EXPORTS ConvolveBuf
+ +        {
+ +            Size result_size;
+ +            Size block_size;
+ +            Size user_block_size;
+ +            Size dft_size;
+ +
+ +            oclMat image_spect, templ_spect, result_spect;
+ +            oclMat image_block, templ_block, result_data;
+ +
+ +            void create(Size image_size, Size templ_size);
+ +            static Size estimateBlockSize(Size result_size, Size templ_size);
+ +        };
+ +
+ +        //! computes convolution of two images, may use discrete Fourier transform
+ +        //! support only CV_32FC1 type
+ +        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
+ +        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
+ +
+ +        //! Performs a per-element multiplication of two Fourier spectrums.
+ +        //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
+ +        //! support only CV_32FC2 type
+ +        CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
+ +
+ +        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
+ +
+ +        //////////////////////////////// Filter Engine ////////////////////////////////
+ +
+ +        /*!
+ +          The Base Class for 1D or Row-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process 1D data.
+ +          In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseRowFilter_GPU
+ +        {
+ +        public:
+ +            BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseRowFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Column-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process columns of 2D arrays.
+ +          Such filters are used for the "vertical" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseColumnFilter_GPU
+ +        {
+ +        public:
+ +            BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseColumnFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Non-Separable 2D Filters.
+ +
+ +          This is the base class for linear or non-linear 2D filters.
+ +          */
+ +        class CV_EXPORTS BaseFilter_GPU
+ +        {
+ +        public:
+ +            BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
+ +                : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
+ +            virtual ~BaseFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            Size ksize;
+ +            Point anchor;
+ +            int borderType;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Filter Engine.
+ +
+ +          The class can be used to apply an arbitrary filtering operation to an image.
+ +          It contains all the necessary intermediate buffers.
+ +          */
+ +        class CV_EXPORTS FilterEngine_GPU
+ +        {
+ +        public:
+ +            virtual ~FilterEngine_GPU() {}
+ +
+ +            virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
+ +        };
+ +
+ +        //! returns the non-separable filter engine with the specified filter
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
+ +
+ +        //! returns the primitive row filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns the primitive column filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
+ +
+ +        //! returns the separable linear filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
+ +                const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns the separable filter engine with the specified filters
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ +                const Ptr<BaseColumnFilter_GPU> &columnFilter);
+ +
+ +        //! returns the Gaussian filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns filter engine for the generalized Sobel operator
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
+ +
+ +        //! applies Laplacian operator to the image
+ +        // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
+ +        CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1);
+ +
+ +        //! returns 2D box filter
+ +        // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
+ +                const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns box filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D filter with the specified kernel
+ +        // supports CV_8UC1 and CV_8UC4 types
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
+ +                Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns the non-separable linear filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using the normalized box filter
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
+ +        CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
+ +                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D morphological filter
+ +        //! only MORPH_ERODE and MORPH_DILATE are supported
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
+ +                Point anchor = Point(-1, -1));
+ +
+ +        //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int iterations = 1);
+ +
+ +        //! a synonym for normalized box filter
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
+ +                                int borderType = BORDER_CONSTANT)
+ +        {
+ +            boxFilter(src, dst, -1, ksize, anchor, borderType);
+ +        }
+ +
+ +        //! applies non-separable 2D linear filter to the image
+ +        //  Note, at the moment this function only works when anchor point is in the kernel center
+ +        //  and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
+ +        CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
+ +                                 Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! applies separable 2D linear filter to the image
+ +        CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
+ +                                    Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies generalized Sobel operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies the vertical or horizontal Scharr operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using Gaussian filter.
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! erodes the image (applies the local minimum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                               int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! dilates the image (applies the local maximum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! applies an advanced morphological operation to the image
+ +        CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                      int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        ////////////////////////////// Image processing //////////////////////////////
+ +        //! Does mean shift filtering on GPU.
+ +        CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
+ +                                           TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift procedure on GPU.
+ +        CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
+ +                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift segmentation with elimiation of small regions.
+ +        CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
+ +                                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! applies fixed threshold to the image.
+ +        // supports CV_8UC1 and CV_32FC1 data type
+ +        // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
+ +        CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
+ +
+ +        //! resizes the image
+ +        // Supports INTER_NEAREST, INTER_LINEAR
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
+ +
+ +        //! Applies a generic geometrical transformation to an image.
+ +
+ +        // Supports INTER_NEAREST, INTER_LINEAR.
+ +
+ +        // Map1 supports CV_16SC2, CV_32FC2  types.
+ +
+ +        // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
+ +
+ +        CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
+ +
+ +        //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+ +        // supports CV_8UC1, CV_8UC4, CV_32SC1 types
+ +        CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
+ +
+ +        //! Smoothes image using median filter
+ +        // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F.
+ +        CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
+ +
+ +        //! warps the image using affine transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! warps the image using perspective transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! computes the integral image and integral for the squared image
+ +        // sum will have CV_32S type, sqsum - CV32F type
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
+ +        CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ +            int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ +            int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ +
+ +        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +
+ +#if 0
+ +        class CV_EXPORTS OclCascadeClassifier : public  cv::CascadeClassifier
+ +        {
+ +        public:
+ +            OclCascadeClassifier() {};
+ +            ~OclCascadeClassifier() {};
+ +
+ +            CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
+ +                                        int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
+ +        };
+ +#endif
+ +
+ +#if 0
+ +        class CV_EXPORTS OclCascadeClassifierBuf : public  cv::CascadeClassifier
+ +        {
+ +        public:
+ +            OclCascadeClassifierBuf() :
+ +                m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
+ +
+ +            ~OclCascadeClassifierBuf() { release(); }
+ +
+ +            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
+ +                                  double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
+ +                                  Size minSize = Size(), Size maxSize = Size());
+ +            void release();
+ +
+ +        private:
+ +            void Init(const int rows, const int cols, double scaleFactor, int flags,
+ +                      const int outputsz, const size_t localThreads[],
+ +                      Size minSize, Size maxSize);
+ +            void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
+ +            void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
+ +                                         const double scaleFactor, const size_t localThreads[],
+ +                                         Size minSize, Size maxSize);
+ +            void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
+ +
+ +            int m_rows;
+ +            int m_cols;
+ +            int m_flags;
+ +            int m_loopcount;
+ +            int m_nodenum;
+ +            bool findBiggestObject;
+ +            bool initialized;
+ +            double m_scaleFactor;
+ +            Size m_minSize;
+ +            Size m_maxSize;
+ +            std::vector<Size> sizev;
+ +            std::vector<float> scalev;
+ +            oclMat gimg1, gsum, gsqsum;
+ +            void * buffers;
+ +        };
+ +#endif
+ +
+ +        /////////////////////////////// Pyramid /////////////////////////////////////
+ +        CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
+ +
+ +        //! upsamples the source image and then smoothes it
+ +        CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
+ +
+ +        //! performs linear blending of two images
+ +        //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
+ +
+ +        //! computes vertical sum, supports only CV_32FC1 images
+ +        CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
+ +
+ +        ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+ +        struct CV_EXPORTS MatchTemplateBuf
+ +        {
+ +            Size user_block_size;
+ +            oclMat imagef, templf;
+ +            std::vector<oclMat> images;
+ +            std::vector<oclMat> image_sums;
+ +            std::vector<oclMat> image_sqsums;
+ +        };
+ +
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
+ +
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
+ +
+ +
+ +
+ +        ///////////////////////////////////////////// Canny /////////////////////////////////////////////
+ +        struct CV_EXPORTS CannyBuf;
+ +
+ +        //! compute edges of the input image using Canny operator
+ +        // Support CV_8UC1 only
+ +        CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +
+ +        struct CV_EXPORTS CannyBuf
+ +        {
+ +            CannyBuf() : counter(NULL) {}
+ +            ~CannyBuf()
+ +            {
+ +                release();
+ +            }
+ +            explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
+ +            {
+ +                create(image_size, apperture_size);
+ +            }
+ +            CannyBuf(const oclMat &dx_, const oclMat &dy_);
+ +            void create(const Size &image_size, int apperture_size = 3);
+ +            void release();
+ +
+ +            oclMat dx, dy;
+ +            oclMat dx_buf, dy_buf;
+ +            oclMat magBuf, mapBuf;
+ +            oclMat trackBuf1, trackBuf2;
+ +            void *counter;
+ +            Ptr<FilterEngine_GPU> filterDX, filterDY;
+ +        };
+ +
+ +        ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
+ +        //! HoughCircles
+ +        struct HoughCirclesBuf
+ +        {
+ +            oclMat edges;
+ +            oclMat accum;
+ +            oclMat srcPoints;
+ +            oclMat centers;
+ +            CannyBuf cannyBuf;
+ +        };
+ +
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
+ +
+ +
+ +        ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+ +        //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ +        //! Param dft_size is the size of DFT transform.
+ +        //!
+ +        //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+ +        // support src type of CV32FC1, CV32FC2
+ +        // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+ +        // dft_size is the size of original input, which is used for transformation from complex to real.
+ +        // dft_size must be powers of 2, 3 and 5
+ +        // real to complex dft requires at least v1.8 clAmdFft
+ +        // real to complex dft output is not the same with cpu version
+ +        // real to complex and complex to real does not support DFT_ROWS
+ +        CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
+ +
+ +        //! implements generalized matrix product algorithm GEMM from BLAS
+ +        // The functionality requires clAmdBlas library
+ +        // only support type CV_32FC1
+ +        // flag GEMM_3_T is not supported
+ +        CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ +                             const oclMat &src3, double beta, oclMat &dst, int flags = 0);
+ +
+ +        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+ +
+ +        struct CV_EXPORTS HOGDescriptor
+ +
+ +        {
+ +
+ +            enum { DEFAULT_WIN_SIGMA = -1 };
+ +
+ +            enum { DEFAULT_NLEVELS = 64 };
+ +
+ +            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+ +
+ +
+ +
+ +            HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
+ +
+ +                          Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
+ +
+ +                          int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
+ +
+ +                          double threshold_L2hys = 0.2, bool gamma_correction = true,
+ +
+ +                          int nlevels = DEFAULT_NLEVELS);
+ +
+ +
+ +
+ +            size_t getDescriptorSize() const;
+ +
+ +            size_t getBlockHistogramSize() const;
+ +
+ +
+ +
+ +            void setSVMDetector(const std::vector<float> &detector);
+ +
+ +
+ +
+ +            static std::vector<float> getDefaultPeopleDetector();
+ +
+ +            static std::vector<float> getPeopleDetector48x96();
+ +
+ +            static std::vector<float> getPeopleDetector64x128();
+ +
+ +
+ +
+ +            void detect(const oclMat &img, std::vector<Point> &found_locations,
+ +
+ +                        double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                        Size padding = Size());
+ +
+ +
+ +
+ +            void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
+ +
+ +                                  double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                                  Size padding = Size(), double scale0 = 1.05,
+ +
+ +                                  int group_threshold = 2);
+ +
+ +
+ +
+ +            void getDescriptors(const oclMat &img, Size win_stride,
+ +
+ +                                oclMat &descriptors,
+ +
+ +                                int descr_format = DESCR_FORMAT_COL_BY_COL);
+ +
+ +
+ +
+ +            Size win_size;
+ +
+ +            Size block_size;
+ +
+ +            Size block_stride;
+ +
+ +            Size cell_size;
+ +
+ +            int nbins;
+ +
+ +            double win_sigma;
+ +
+ +            double threshold_L2hys;
+ +
+ +            bool gamma_correction;
+ +
+ +            int nlevels;
+ +
+ +
+ +
+ +        protected:
+ +
+ +            // initialize buffers; only need to do once in case of multiscale detection
+ +
+ +            void init_buffer(const oclMat &img, Size win_stride);
+ +
+ +
+ +
+ +            void computeBlockHistograms(const oclMat &img);
+ +
+ +            void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
+ +
+ +
+ +
+ +            double getWinSigma() const;
+ +
+ +            bool checkDetectorSize() const;
+ +
+ +
+ +
+ +            static int numPartsWithin(int size, int part_size, int stride);
+ +
+ +            static Size numPartsWithin(Size size, Size part_size, Size stride);
+ +
+ +
+ +
+ +            // Coefficients of the separating plane
+ +
+ +            float free_coef;
+ +
+ +            oclMat detector;
+ +
+ +
+ +
+ +            // Results of the last classification step
+ +
+ +            oclMat labels;
+ +
+ +            Mat labels_host;
+ +
+ +
+ +
+ +            // Results of the last histogram evaluation step
+ +
+ +            oclMat block_hists;
+ +
+ +
+ +
+ +            // Gradients conputation results
+ +
+ +            oclMat grad, qangle;
+ +
+ +
+ +
+ +            // scaled image
+ +
+ +            oclMat image_scale;
+ +
+ +
+ +
+ +            // effect size of input image (might be different from original size after scaling)
+ +
+ +            Size effect_size;
+ +
+ +        };
+ +
+ +
+ +        ////////////////////////feature2d_ocl/////////////////
+ +        /****************************************************************************************\
+ +        *                                      Distance                                          *
+ +        \****************************************************************************************/
+ +        template<typename T>
+ +        struct CV_EXPORTS Accumulator
+ +        {
+ +            typedef T Type;
+ +        };
+ +        template<> struct Accumulator<unsigned char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<unsigned short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +
+ +        /*
+ +         * Manhattan distance (city block distance) functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L1
+ +        {
+ +            enum { normType = NORM_L1 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return normL1<ValueType, ResultType>(a, b, size);
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Euclidean distance functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L2
+ +        {
+ +            enum { normType = NORM_L2 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ +         * bit count of A exclusive XOR'ed with B
+ +         */
+ +        struct CV_EXPORTS Hamming
+ +        {
+ +            enum { normType = NORM_HAMMING };
+ +            typedef unsigned char ValueType;
+ +            typedef int ResultType;
+ +
+ +            /** this will count the bits in a ^ b
+ +             */
+ +            ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
+ +            {
+ +                return normHamming(a, b, size);
+ +            }
+ +        };
+ +
+ +        ////////////////////////////////// BruteForceMatcher //////////////////////////////////
+ +
+ +        class CV_EXPORTS BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            enum DistType {L1Dist = 0, L2Dist, HammingDist};
+ +            explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
+ +
+ +            // Add descriptors to train descriptor collection
+ +            void add(const std::vector<oclMat> &descCollection);
+ +
+ +            // Get train descriptors collection
+ +            const std::vector<oclMat> &getTrainDescriptors() const;
+ +
+ +            // Clear train descriptors collection
+ +            void clear();
+ +
+ +            // Return true if there are not train descriptors in collection
+ +            bool empty() const;
+ +
+ +            // Return true if the matcher supports mask in match methods
+ +            bool isMaskSupported() const;
+ +
+ +            // Find one best match for each query descriptor
+ +            void matchSingle(const oclMat &query, const oclMat &train,
+ +                             oclMat &trainIdx, oclMat &distance,
+ +                             const oclMat &mask = oclMat());
+ +
+ +            // Download trainIdx and distance and convert it to CPU vector with DMatch
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ +            // Convert trainIdx and distance to vector with DMatch
+ +            static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
+ +            // Find one best match for each query descriptor
+ +            void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
+ +
+ +            // Make gpu collection of trains and masks in suitable format for matchCollection function
+ +            void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +            // Find one best match from train collection for each query descriptor
+ +            void matchCollection(const oclMat &query, const oclMat &trainCollection,
+ +                                 oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ +                                 const oclMat &masks = oclMat());
+ +
+ +            // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ +            // Convert trainIdx, imgIdx and distance to vector with DMatch
+ +            static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
+ +            // Find one best match from train collection for each query descriptor.
+ +            void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +            // Find k best matches for each query descriptor (in increasing order of distances)
+ +            void knnMatchSingle(const oclMat &query, const oclMat &train,
+ +                                oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
+ +                                const oclMat &mask = oclMat());
+ +
+ +            // Download trainIdx and distance and convert it to vector with DMatch
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx and distance to vector with DMatch
+ +            static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
+ +                                        std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find k best matches for each query descriptor (in increasing order of distances).
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            void knnMatch(const oclMat &query, const oclMat &train,
+ +                          std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
+ +                          bool compactResult = false);
+ +
+ +            // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+ +            void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
+ +                                     oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ +                                     const oclMat &maskCollection = oclMat());
+ +
+ +            // Download trainIdx and distance and convert it to vector with DMatch
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
+ +                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx and distance to vector with DMatch
+ +            static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find k best matches  for each query descriptor (in increasing order of distances).
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
+ +                          const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
+ +            // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+ +            // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+ +            // because it didn't have enough memory.
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +            // Matches doesn't sorted.
+ +            void radiusMatchSingle(const oclMat &query, const oclMat &train,
+ +                                   oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ +                                   const oclMat &mask = oclMat());
+ +
+ +            // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+ +            // matches will be sorted in increasing order of distances.
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance
+ +            // in increasing order of distances).
+ +            void radiusMatch(const oclMat &query, const oclMat &train,
+ +                             std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ +                             const oclMat &mask = oclMat(), bool compactResult = false);
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +            // Matches doesn't sorted.
+ +            void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ +                                       const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +            // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+ +            // matches will be sorted in increasing order of distances.
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find best matches from train collection for each query descriptor which have distance less than
+ +            // maxDistance (in increasing order of distances).
+ +            void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ +                             const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
+ +            DistType distType;
+ +
+ +        private:
+ +            std::vector<oclMat> trainDescCollection;
+ +        };
+ +
+ +        template <class Distance>
+ +        class CV_EXPORTS BruteForceMatcher_OCL;
+ +
+ +        template <typename T>
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
+ +            explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
+ +        };
+ +        template <typename T>
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
+ +            explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
+ +        };
+ +        template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
+ +            explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
+ +        };
+ +
+ +        class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
+ +        };
+ +
+ +        class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
+ +        {
+ +        public:
+ +            explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+ +                int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+ +
+ +            //! return 1 rows matrix with CV_32FC2 type
+ +            void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
+ +            //! download points of type Point2f to a vector. the vector's content will be erased
+ +            void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
+ +
+ +            int maxCorners;
+ +            double qualityLevel;
+ +            double minDistance;
+ +
+ +            int blockSize;
+ +            bool useHarrisDetector;
+ +            double harrisK;
+ +            void releaseMemory()
+ +            {
+ +                Dx_.release();
+ +                Dy_.release();
+ +                eig_.release();
+ +                minMaxbuf_.release();
+ +                tmpCorners_.release();
+ +            }
+ +        private:
+ +            oclMat Dx_;
+ +            oclMat Dy_;
+ +            oclMat eig_;
+ +            oclMat minMaxbuf_;
+ +            oclMat tmpCorners_;
+ +        };
+ +
+ +        inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
+ +            int blockSize_, bool useHarrisDetector_, double harrisK_)
+ +        {
+ +            maxCorners = maxCorners_;
+ +            qualityLevel = qualityLevel_;
+ +            minDistance = minDistance_;
+ +            blockSize = blockSize_;
+ +            useHarrisDetector = useHarrisDetector_;
+ +            harrisK = harrisK_;
+ +        }
+ +
+ +        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+ +
+ +        class CV_EXPORTS PyrLKOpticalFlow
+ +        {
+ +        public:
+ +            PyrLKOpticalFlow()
+ +            {
+ +                winSize = Size(21, 21);
+ +                maxLevel = 3;
+ +                iters = 30;
+ +                derivLambda = 0.5;
+ +                useInitialFlow = false;
+ +                minEigThreshold = 1e-4f;
+ +                getMinEigenVals = false;
+ +                isDeviceArch11_ = false;
+ +            }
+ +
+ +            void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
+ +                        oclMat &status, oclMat *err = 0);
+ +
+ +            void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
+ +
+ +            Size winSize;
+ +            int maxLevel;
+ +            int iters;
+ +            double derivLambda;
+ +            bool useInitialFlow;
+ +            float minEigThreshold;
+ +            bool getMinEigenVals;
+ +
+ +            void releaseMemory()
+ +            {
+ +                dx_calcBuf_.release();
+ +                dy_calcBuf_.release();
+ +
+ +                prevPyr_.clear();
+ +                nextPyr_.clear();
+ +
+ +                dx_buf_.release();
+ +                dy_buf_.release();
+ +            }
+ +
+ +        private:
+ +            void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
+ +
+ +            void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
+ +
+ +            oclMat dx_calcBuf_;
+ +            oclMat dy_calcBuf_;
+ +
+ +            std::vector<oclMat> prevPyr_;
+ +            std::vector<oclMat> nextPyr_;
+ +
+ +            oclMat dx_buf_;
+ +            oclMat dy_buf_;
+ +
+ +            oclMat uPyr_[2];
+ +            oclMat vPyr_[2];
+ +
+ +            bool isDeviceArch11_;
+ +        };
+ +        //////////////// build warping maps ////////////////////
+ +        //! builds plane warping maps
+ +        CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds cylindrical warping maps
+ +        CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds spherical warping maps
+ +        CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds Affine warping maps
+ +        CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        //! builds Perspective warping maps
+ +        CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
+ +        //! Interpolate frames (images) using provided optical flow (displacement field).
+ +        //! frame0   - frame 0 (32-bit floating point images, single channel)
+ +        //! frame1   - frame 1 (the same type and size)
+ +        //! fu       - forward horizontal displacement
+ +        //! fv       - forward vertical displacement
+ +        //! bu       - backward horizontal displacement
+ +        //! bv       - backward vertical displacement
+ +        //! pos      - new frame position
+ +        //! newFrame - new frame
+ +        //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
+ +        //!            occlusion masks            0, occlusion masks            1,
+ +        //!            interpolated forward flow  0, interpolated forward flow  1,
+ +        //!            interpolated backward flow 0, interpolated backward flow 1
+ +        //!
+ +        CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
+ +                                          const oclMat &fu, const oclMat &fv,
+ +                                          const oclMat &bu, const oclMat &bv,
+ +                                          float pos, oclMat &newFrame, oclMat &buf);
+ +
+ +        //! computes moments of the rasterized shape or a vector of points
+ +        CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage);
+ +
+ +        class CV_EXPORTS StereoBM_OCL
+ +        {
+ +        public:
+ +            enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+ +
+ +            enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+ +
+ +            //! the default constructor
+ +            StereoBM_OCL();
+ +            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+ +            StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+ +
+ +            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+ +            //! Output disparity has CV_8U type.
+ +            void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
+ +
+ +            //! Some heuristics that tries to estmate
+ +            // if current GPU will be faster then CPU in this algorithm.
+ +            // It queries current active device.
+ +            static bool checkIfGpuCallReasonable();
+ +
+ +            int preset;
+ +            int ndisp;
+ +            int winSize;
+ +
+ +            // If avergeTexThreshold  == 0 => post procesing is disabled
+ +            // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+ +            // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+ +            // i.e. input left image is low textured.
+ +            float avergeTexThreshold;
+ +        private:
+ +            oclMat minSSD, leBuf, riBuf;
+ +        };
+ +
+ +        class CV_EXPORTS StereoBeliefPropagation
+ +        {
+ +        public:
+ +            enum { DEFAULT_NDISP  = 64 };
+ +            enum { DEFAULT_ITERS  = 5  };
+ +            enum { DEFAULT_LEVELS = 5  };
+ +            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
+ +            explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
+ +                                             int iters  = DEFAULT_ITERS,
+ +                                             int levels = DEFAULT_LEVELS,
+ +                                             int msg_type = CV_16S);
+ +            StereoBeliefPropagation(int ndisp, int iters, int levels,
+ +                                    float max_data_term, float data_weight,
+ +                                    float max_disc_term, float disc_single_jump,
+ +                                    int msg_type = CV_32F);
+ +            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ +            void operator()(const oclMat &data, oclMat &disparity);
+ +            int ndisp;
+ +            int iters;
+ +            int levels;
+ +            float max_data_term;
+ +            float data_weight;
+ +            float max_disc_term;
+ +            float disc_single_jump;
+ +            int msg_type;
+ +        private:
+ +            oclMat u, d, l, r, u2, d2, l2, r2;
+ +            std::vector<oclMat> datas;
+ +            oclMat out;
+ +        };
+ +
+ +        class CV_EXPORTS StereoConstantSpaceBP
+ +        {
+ +        public:
+ +            enum { DEFAULT_NDISP    = 128 };
+ +            enum { DEFAULT_ITERS    = 8   };
+ +            enum { DEFAULT_LEVELS   = 4   };
+ +            enum { DEFAULT_NR_PLANE = 4   };
+ +            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
+ +            explicit StereoConstantSpaceBP(
+ +                int ndisp    = DEFAULT_NDISP,
+ +                int iters    = DEFAULT_ITERS,
+ +                int levels   = DEFAULT_LEVELS,
+ +                int nr_plane = DEFAULT_NR_PLANE,
+ +                int msg_type = CV_32F);
+ +            StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
+ +                float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
+ +                int min_disp_th = 0,
+ +                int msg_type = CV_32F);
+ +            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ +            int ndisp;
+ +            int iters;
+ +            int levels;
+ +            int nr_plane;
+ +            float max_data_term;
+ +            float data_weight;
+ +            float max_disc_term;
+ +            float disc_single_jump;
+ +            int min_disp_th;
+ +            int msg_type;
+ +            bool use_local_init_data_cost;
+ +        private:
+ +            oclMat u[2], d[2], l[2], r[2];
+ +            oclMat disp_selected_pyr[2];
+ +            oclMat data_cost;
+ +            oclMat data_cost_selected;
+ +            oclMat temp;
+ +            oclMat out;
+ +        };
+ +
+ +        // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+ +        //
+ +        // see reference:
+ +        //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+ +        //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+ +        class CV_EXPORTS OpticalFlowDual_TVL1_OCL
+ +        {
+ +        public:
+ +            OpticalFlowDual_TVL1_OCL();
+ +
+ +            void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
+ +
+ +            void collectGarbage();
+ +
+ +            /**
+ +            * Time step of the numerical scheme.
+ +            */
+ +            double tau;
+ +
+ +            /**
+ +            * Weight parameter for the data term, attachment parameter.
+ +            * This is the most relevant parameter, which determines the smoothness of the output.
+ +            * The smaller this parameter is, the smoother the solutions we obtain.
+ +            * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+ +            */
+ +            double lambda;
+ +
+ +            /**
+ +            * Weight parameter for (u - v)^2, tightness parameter.
+ +            * It serves as a link between the attachment and the regularization terms.
+ +            * In theory, it should have a small value in order to maintain both parts in correspondence.
+ +            * The method is stable for a large range of values of this parameter.
+ +            */
+ +            double theta;
+ +
+ +            /**
+ +            * Number of scales used to create the pyramid of images.
+ +            */
+ +            int nscales;
+ +
+ +            /**
+ +            * Number of warpings per scale.
+ +            * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+ +            * This is a parameter that assures the stability of the method.
+ +            * It also affects the running time, so it is a compromise between speed and accuracy.
+ +            */
+ +            int warps;
+ +
+ +            /**
+ +            * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+ +            * A small value will yield more accurate solutions at the expense of a slower convergence.
+ +            */
+ +            double epsilon;
+ +
+ +            /**
+ +            * Stopping criterion iterations number used in the numerical scheme.
+ +            */
+ +            int iterations;
+ +
+ +            bool useInitialFlow;
+ +
+ +        private:
+ +            void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
+ +
+ +            std::vector<oclMat> I0s;
+ +            std::vector<oclMat> I1s;
+ +            std::vector<oclMat> u1s;
+ +            std::vector<oclMat> u2s;
+ +
+ +            oclMat I1x_buf;
+ +            oclMat I1y_buf;
+ +
+ +            oclMat I1w_buf;
+ +            oclMat I1wx_buf;
+ +            oclMat I1wy_buf;
+ +
+ +            oclMat grad_buf;
+ +            oclMat rho_c_buf;
+ +
+ +            oclMat p11_buf;
+ +            oclMat p12_buf;
+ +            oclMat p21_buf;
+ +            oclMat p22_buf;
+ +
+ +            oclMat diff_buf;
+ +            oclMat norm_buf;
+ +        };
+ +    }
+ +}
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( push)
+ +#  pragma warning( disable: 4267)
+ +#endif
+ +#include "opencv2/ocl/matrix_operations.hpp"
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( pop)
+ +#endif
+ +
+ +#endif /* __OPENCV_OCL_HPP__ */
diff --cc modules/ocl/include/opencv2/ocl/private/util.hpp

index 4c9980a,634f2f2..93f7461
--- 1/modules/ocl/include/opencv2/ocl/private/util.hpp
--- 2/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@@ -109,11 -108,11 +109,11 @@@ namespace c
               DISABLE
           };
   
- -        void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+ +        void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
                                     size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
- -        void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+ +        void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
                                     size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
--                                  int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
++                                  int depth, const char *build_options, FLUSH_MODE finish_mode = DISABLE);
           // bind oclMat to OpenCL image textures
           // note:
           //   1. there is no memory management. User need to explicitly release the resource
diff --cc modules/ocl/perf/main.cpp
Simple merge
diff --cc modules/ocl/perf/perf_calib3d.cpp

index 231f065,f998ddf..428f00e
--- 1/modules/ocl/test/test_columnsum.cpp
--- 2/modules/ocl/perf/perf_calib3d.cpp
+++ b/modules/ocl/perf/perf_calib3d.cpp
@@@ -45,50 -45,57 +45,57 @@@
   //M*/
   
   #include "precomp.hpp"
- #include <iomanip>
+ ///////////// StereoMatchBM ////////////////////////
+ PERFTEST(StereoMatchBM)
+ {
+       Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
+       Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
+       Mat disp,dst;
+       ocl::oclMat d_left, d_right,d_disp;
+       int n_disp= 128;
+       int winSize =19;
   
- #ifdef HAVE_OPENCL
+       SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
   
- PARAM_TEST_CASE(ColumnSum, cv::Size)
- {
-     cv::Size size;
-     cv::Mat src;
- -      StereoBM bm(0, n_disp, winSize);
- -      bm(left_image, right_image, dst);
++      Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
++      bm->compute(left_image, right_image, dst);
   
-     virtual void SetUp()
-     {
-         size = GET_PARAM(0);
-     }
- };
+       CPU_ON;
- -      bm(left_image, right_image, dst);
++      bm->compute(left_image, right_image, dst);
+       CPU_OFF;
   
- TEST_P(ColumnSum, Accuracy)
- {
-     cv::Mat src = randomMat(size, CV_32FC1);
-     cv::ocl::oclMat d_dst;
-     cv::ocl::oclMat d_src(src);
- 
-     cv::ocl::columnSum(d_src, d_dst);
- 
-     cv::Mat dst(d_dst);
- 
-     for (int j = 0; j < src.cols; ++j)
-     {
-         float gold = src.at<float>(0, j);
-         float res = dst.at<float>(0, j);
-         ASSERT_NEAR(res, gold, 1e-5);
-     }
- 
-     for (int i = 1; i < src.rows; ++i)
-     {
-         for (int j = 0; j < src.cols; ++j)
-         {
-             float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
-             float res = dst.at<float>(i, j);
-             ASSERT_NEAR(res, gold, 1e-5);
-         }
-     }
+       d_left.upload(left_image);
+       d_right.upload(right_image);
+ 
+       ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
+ 
+       WARMUP_ON;
+       d_bm(d_left, d_right, d_disp);
+       WARMUP_OFF;
+ 
+     cv::Mat ocl_mat;
+     d_disp.download(ocl_mat);
+     ocl_mat.convertTo(ocl_mat, dst.type());
+ 
+       GPU_ON;
+       d_bm(d_left, d_right, d_disp);
+       GPU_OFF;
+ 
+       GPU_FULL_ON;
+       d_left.upload(left_image);
+       d_right.upload(right_image);
+       d_bm(d_left, d_right, d_disp);
+       d_disp.download(disp);
+       GPU_FULL_OFF;
+     
+     TestSystem::instance().setAccurate(-1, 0.);
   }
   
- INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES);
   
   
- #endif
+ 
+ 
+ 
+ 
+ 
+       
diff --cc modules/ocl/perf/perf_imgproc.cpp
Simple merge
diff --cc modules/ocl/perf/precomp.cpp

index 65e2d51,9fc6342..9601cda
--- 1/modules/ocl/perf/precomp.cpp
--- 2/modules/ocl/perf/precomp.cpp
+++ b/modules/ocl/perf/precomp.cpp
@@@ -331,24 -331,10 +331,10 @@@ void TestSystem::printMetrics(int is_ac
       cout << setiosflags(ios_base::left);
       stringstream stream;
   
- #if 0
-     if(is_accurate == 1)
-             stream << "Pass";
-     else if(is_accurate_ == 0)
-             stream << "Fail";
-     else if(is_accurate == -1)
-         stream << " ";
-     else
-     {
-         std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
-         exit(-1);
-     }
- #endif
- 
       std::stringstream &cur_subtest_description = getCurSubtestDescription();
- -   
+ +
   #if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
- -    
+ +
       WORD color;
       const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
       // Gets the current text color.
diff --cc modules/ocl/src/hog.cpp

index b2ebb23,3533cce..ff8f091
--- 1/modules/ocl/src/hog.cpp
--- 2/modules/ocl/src/hog.cpp
+++ b/modules/ocl/src/hog.cpp
@@@ -44,17 -44,111 +44,111 @@@
   //M*/
   
   #include "precomp.hpp"
+ +
   using namespace cv;
   using namespace cv::ocl;
- -using namespace std;
   
- 
   #define CELL_WIDTH 8
   #define CELL_HEIGHT 8
   #define CELLS_PER_BLOCK_X 2
   #define CELLS_PER_BLOCK_Y 2
   #define NTHREADS 256
   
- -static const float gaussian_interp_lut[] = 
+ static oclMat gauss_w_lut;
+ static bool hog_device_cpu;
+ /* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */
- -    0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f, 
- -    0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f, 
- -    0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f, 
- -    0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f, 
- -    0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f, 
- -    0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, 
- -    0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, 
- -    0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, 
- -    0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 
- -    0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 
- -    0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f, 
- -    0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f, 
- -    0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f, 
- -    0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, 
- -    0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, 
- -    0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, 
- -    0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 
- -    0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 
- -    0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f, 
- -    0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f, 
- -    0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f, 
- -    0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f, 
- -    0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f, 
- -    0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f, 
- -    0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 
- -    0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 
- -    0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f, 
- -    0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f, 
- -    0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f, 
- -    0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, 
- -    0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, 
- -    0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, 
- -    0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 
- -    0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 
- -    0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f, 
- -    0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f, 
- -    0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f, 
- -    0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, 
- -    0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, 
- -    0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, 
- -    0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 
- -    0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 
- -    0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f, 
++static const float gaussian_interp_lut[] =
+ {
+     /* gaussian lut */
- -    0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, 
- -    0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, 
- -    0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f, 
- -    0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 
- -    0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 
- -    0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f, 
- -    0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f, 
- -    0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f, 
- -    0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, 
- -    0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, 
- -    0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f, 
- -    0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 
- -    0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 
- -    0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f, 
- -    0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f, 
- -    0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f, 
- -    0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, 
- -    0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, 
- -    0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f, 
- -    0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 
- -    0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 
- -    0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f, 
- -    0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f, 
- -    0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f, 
- -    0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, 
- -    0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, 
- -    0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f, 
- -    0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 
- -    0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 
- -    0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f, 
- -    0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f, 
- -    0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f, 
- -    0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, 
- -    0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, 
- -    0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f, 
- -    0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 
- -    0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 
- -    0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f, 
- -    0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f, 
- -    0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f, 
- -    0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, 
- -    0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, 
++    0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f,
++    0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f,
++    0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f,
++    0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f,
++    0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f,
++    0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
++    0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
++    0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
++    0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f,
++    0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f,
++    0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f,
++    0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f,
++    0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f,
++    0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
++    0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
++    0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
++    0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f,
++    0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f,
++    0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f,
++    0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f,
++    0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f,
++    0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f,
++    0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f,
++    0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f,
++    0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f,
++    0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f,
++    0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f,
++    0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f,
++    0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f,
++    0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f,
++    0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f,
++    0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f,
++    0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f,
++    0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f,
++    0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f,
++    0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f,
++    0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f,
++    0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f,
++    0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f,
++    0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f,
++    0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f,
++    0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f,
++    0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f,
+     /* interp_weight lut */
++    0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
++    0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
++    0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f,
++    0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f,
++    0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f,
++    0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f,
++    0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f,
++    0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f,
++    0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
++    0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
++    0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f,
++    0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f,
++    0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f,
++    0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f,
++    0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f,
++    0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f,
++    0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
++    0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
++    0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f,
++    0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f,
++    0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f,
++    0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f,
++    0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f,
++    0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f,
++    0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f,
++    0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f,
++    0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f,
++    0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f,
++    0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f,
++    0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f,
++    0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f,
++    0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f,
++    0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f,
++    0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f,
++    0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f,
++    0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f,
++    0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f,
++    0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f,
++    0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f,
++    0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f,
++    0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f,
++    0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f,
+     0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f
+ };
+ 
   namespace cv
   {
       namespace ocl
@@@ -86,30 -180,35 +180,35 @@@ namespace c
                                         int nblocks_win_x, int nblocks_win_y);
   
                   void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
-                                    int height, int width, const cv::ocl::oclMat &grad,
-                                    const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists);
+                                    int height, int width, float sigma, const cv::ocl::oclMat &grad,
- -                                   const cv::ocl::oclMat &qangle, 
++                                   const cv::ocl::oclMat &qangle,
+                                    const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists);
   
                   void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
-                                      int height, int width, cv::ocl::oclMat &block_hists, float threshold);
- -                                     int height, int width, cv::ocl::oclMat &block_hists, 
++                                     int height, int width, cv::ocl::oclMat &block_hists,
+                                      float threshold);
   
                   void classify_hists(int win_height, int win_width, int block_stride_y,
-                                     int block_stride_x, int win_stride_y, int win_stride_x, int height,
-                                     int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
- -                                    int block_stride_x, int win_stride_y, int win_stride_x, 
- -                                    int height, int width, const cv::ocl::oclMat &block_hists, 
++                                    int block_stride_x, int win_stride_y, int win_stride_x,
++                                    int height, int width, const cv::ocl::oclMat &block_hists,
+                                     const cv::ocl::oclMat &coefs, float free_coef,
                                       float threshold, cv::ocl::oclMat &labels);
   
-                 void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
-                                             int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
- -                void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, 
- -                                            int block_stride_x, int win_stride_y, int win_stride_x, 
++                void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y,
++                                            int block_stride_x, int win_stride_y, int win_stride_x,
+                                             int height, int width, const cv::ocl::oclMat &block_hists,
                                               cv::ocl::oclMat &descriptors);
-                 void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
-                                             int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
- -                void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, 
- -                                            int block_stride_x, int win_stride_y, int win_stride_x, 
++                void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y,
++                                            int block_stride_x, int win_stride_y, int win_stride_x,
+                                             int height, int width, const cv::ocl::oclMat &block_hists,
                                               cv::ocl::oclMat &descriptors);
   
                   void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
-                                             float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
- -                                            float angle_scale, cv::ocl::oclMat &grad, 
++                                            float angle_scale, cv::ocl::oclMat &grad,
+                                             cv::ocl::oclMat &qangle, bool correct_gamma);
                   void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
-                                             float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
- 
-                 void resize( const oclMat &src, oclMat &dst, const Size sz);
- -                                            float angle_scale, cv::ocl::oclMat &grad, 
++                                            float angle_scale, cv::ocl::oclMat &grad,
+                                             cv::ocl::oclMat &qangle, bool correct_gamma);
               }
           }
       }
@@@ -117,8 -216,14 +216,14 @@@
   
   using namespace ::cv::ocl::device;
   
- cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
-                                       int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
+ static inline int divUp(int total, int grain)
+ {
+     return (total + grain - 1) / grain;
+ }
+ 
- -cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, 
- -                                      Size cell_size_, int nbins_, double win_sigma_, 
++cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_,
++                                      Size cell_size_, int nbins_, double win_sigma_,
+                                       double threshold_L2hys_, bool gamma_correction_, int nlevels_)
       : win_size(win_size_),
         block_size(block_size_),
         block_stride(block_stride_),
@@@ -132,19 -237,27 +237,27 @@@
       CV_Assert((win_size.width  - block_size.width ) % block_stride.width  == 0 &&
                 (win_size.height - block_size.height) % block_stride.height == 0);
   
-     CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0);
- -    CV_Assert(block_size.width % cell_size.width == 0 && 
++    CV_Assert(block_size.width % cell_size.width == 0 &&
+         block_size.height % cell_size.height == 0);
   
       CV_Assert(block_stride == cell_size);
   
       CV_Assert(cell_size == Size(8, 8));
   
-     Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
- -    Size cells_per_block(block_size.width / cell_size.width, 
++    Size cells_per_block(block_size.width / cell_size.width,
+         block_size.height / cell_size.height);
       CV_Assert(cells_per_block == Size(2, 2));
   
       cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
-     hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
- -    hog::set_up_constants(nbins, block_stride.width, block_stride.height, 
++    hog::set_up_constants(nbins, block_stride.width, block_stride.height,
+         blocks_per_win.width, blocks_per_win.height);
   
       effect_size = Size(0, 0);
+ 
+       if (queryDeviceInfo<IS_CPU_DEVICE, bool>())
+         hog_device_cpu = true;
+     else
+         hog_device_cpu = false;
   }
   
   size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@@@ -154,7 -267,8 +267,8 @@@
   
   size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
   {
-     Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
- -    Size cells_per_block = Size(block_size.width / cell_size.width, 
++    Size cells_per_block = Size(block_size.width / cell_size.width,
+         block_size.height / cell_size.height);
       return (size_t)(nbins * cells_per_block.area());
   }
   
@@@ -167,10 -281,11 +281,11 @@@ bool cv::ocl::HOGDescriptor::checkDetec
   {
       size_t detector_size = detector.rows * detector.cols;
       size_t descriptor_size = getDescriptorSize();
-     return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
- -    return detector_size == 0 || detector_size == descriptor_size || 
++    return detector_size == 0 || detector_size == descriptor_size ||
+         detector_size == descriptor_size + 1;
   }
   
- -void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float> &_detector)
+ +void cv::ocl::HOGDescriptor::setSVMDetector(const std::vector<float> &_detector)
   {
       std::vector<float> detector_reordered(_detector.size());
   
@@@ -207,10 -322,16 +322,16 @@@ void cv::ocl::HOGDescriptor::init_buffe
   
       const size_t block_hist_size = getBlockHistogramSize();
       const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
-     block_hists.create(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F);
- -    block_hists.create(1, 
++    block_hists.create(1,
+         static_cast<int>(block_hist_size * blocks_per_img.area()) + 256, CV_32F);
   
       Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
       labels.create(1, wins_per_img.area(), CV_8U);
- -    vector<float> v_lut = vector<float>(gaussian_interp_lut, gaussian_interp_lut + 
+ 
++    std::vector<float> v_lut = std::vector<float>(gaussian_interp_lut, gaussian_interp_lut +
+         sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0]));
+     Mat m_lut(v_lut);
+     gauss_w_lut.upload(m_lut.reshape(1,1));
   }
   
   void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle)
@@@ -221,10 -342,12 +342,12 @@@
       switch (img.type())
       {
       case CV_8UC1:
-         hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
- -        hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, 
++        hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img,
+             angleScale, grad, qangle, gamma_correction);
           break;
       case CV_8UC4:
-         hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction);
- -        hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, 
++        hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img,
+             angleScale, grad, qangle, gamma_correction);
           break;
       }
   }
@@@ -232,19 -355,21 +355,21 @@@
   
   void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
   {
-     computeGradient(img, grad, qangle);
+     computeGradient(img, this->grad, this->qangle);
   
-     hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
-                        grad, qangle, (float)getWinSigma(), block_hists);
- -    hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, 
++    hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
+         effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists);
   
-     hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
-                          block_hists, (float)threshold_L2hys);
- -    hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, 
++    hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height,
+         effect_size.width, block_hists, (float)threshold_L2hys);
   }
   
   
- void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format)
- -void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, 
++void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride,
+                                             oclMat &descriptors, int descr_format)
   {
-     CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
- -    CV_Assert(win_stride.width % block_stride.width == 0 && 
++    CV_Assert(win_stride.width % block_stride.width == 0 &&
+         win_stride.height % block_stride.height == 0);
   
       init_buffer(img, win_stride);
   
@@@ -254,25 -379,29 +379,29 @@@
       Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
       Size wins_per_img   = numPartsWithin(effect_size, win_size, win_stride);
   
-     descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
- -    descriptors.create(wins_per_img.area(), 
++    descriptors.create(wins_per_img.area(),
+         static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
   
       switch (descr_format)
       {
       case DESCR_FORMAT_ROW_BY_ROW:
-         hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                                     win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
- -        hog::extract_descrs_by_rows(win_size.height, win_size.width, 
- -            block_stride.height, block_stride.width, win_stride.height, win_stride.width, 
++        hog::extract_descrs_by_rows(win_size.height, win_size.width,
++            block_stride.height, block_stride.width, win_stride.height, win_stride.width,
+             effect_size.height, effect_size.width, block_hists, descriptors);
           break;
       case DESCR_FORMAT_COL_BY_COL:
-         hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                                     win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
- -        hog::extract_descrs_by_cols(win_size.height, win_size.width, 
- -            block_stride.height, block_stride.width, win_stride.height, win_stride.width, 
++        hog::extract_descrs_by_cols(win_size.height, win_size.width,
++            block_stride.height, block_stride.width, win_stride.height, win_stride.width,
+             effect_size.height, effect_size.width, block_hists, descriptors);
           break;
       default:
- -        CV_Error(CV_StsBadArg, "Unknown descriptor format");
+ +        CV_Error(Error::StsBadArg, "Unknown descriptor format");
       }
   }
   
   
- void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits, double hit_threshold, Size win_stride, Size padding)
- -void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector<Point> &hits, 
++void cv::ocl::HOGDescriptor::detect(const oclMat &img, std::vector<Point> &hits,
+                                     double hit_threshold, Size win_stride, Size padding)
   {
       CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
       CV_Assert(padding == Size(0, 0));
@@@ -284,14 -413,16 +413,16 @@@
       if (win_stride == Size())
           win_stride = block_stride;
       else
-         CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
- -        CV_Assert(win_stride.width % block_stride.width == 0 && 
++        CV_Assert(win_stride.width % block_stride.width == 0 &&
+             win_stride.height % block_stride.height == 0);
       init_buffer(img, win_stride);
   
       computeBlockHistograms(img);
   
-     hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                         win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists,
-                         detector, (float)free_coef, (float)hit_threshold, labels);
- -    hog::classify_hists(win_size.height, win_size.width, block_stride.height, 
- -        block_stride.width, win_stride.height, win_stride.width, 
- -        effect_size.height, effect_size.width, block_hists, detector, 
++    hog::classify_hists(win_size.height, win_size.width, block_stride.height,
++        block_stride.width, win_stride.height, win_stride.width,
++        effect_size.height, effect_size.width, block_hists, detector,
+         (float)free_coef, (float)hit_threshold, labels);
   
       labels.download(labels_host);
       unsigned char *vec = labels_host.ptr();
@@@ -307,8 -438,9 +438,9 @@@
   
   
   
- void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations, double hit_threshold,
-         Size win_stride, Size padding, double scale0, int group_threshold)
- -void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector<Rect> &found_locations, 
- -                                              double hit_threshold, Size win_stride, Size padding, 
++void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
++                                              double hit_threshold, Size win_stride, Size padding,
+                                               double scale0, int group_threshold)
   {
       CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
       CV_Assert(scale0 > 1);
@@@ -334,7 -466,8 +466,8 @@@
       if (win_stride == Size())
           win_stride = block_stride;
       else
-         CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
- -        CV_Assert(win_stride.width % block_stride.width == 0 && 
++        CV_Assert(win_stride.width % block_stride.width == 0 &&
+             win_stride.height % block_stride.height == 0);
       init_buffer(img, win_stride);
       image_scale.create(img.size(), img.type());
   
@@@ -348,12 -481,14 +481,13 @@@
           }
           else
           {
-             hog::resize( img, image_scale, effect_size);
+             resize(img, image_scale, effect_size);
               detect(image_scale, locations, hit_threshold, win_stride, padding);
           }
-         Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
- -        Size scaled_win_size(cvRound(win_size.width * scale), 
++        Size scaled_win_size(cvRound(win_size.width * scale),
+             cvRound(win_size.height * scale));
           for (size_t j = 0; j < locations.size(); j++)
- -            all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, 
- -              scaled_win_size));
+ +            all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
       }
   
       found_locations.assign(all_candidates.begin(), all_candidates.end());
@@@ -365,9 -500,11 +499,11 @@@ int cv::ocl::HOGDescriptor::numPartsWit
       return (size - part_size + stride) / stride;
   }
   
- cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
- -cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, 
++cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size,
+                                                 cv::Size stride)
   {
-     return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
- -    return Size(numPartsWithin(size.width, part_size.width, stride.width), 
++    return Size(numPartsWithin(size.width, part_size.width, stride.width),
+         numPartsWithin(size.height, part_size.height, stride.height));
   }
   
   std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
@@@ -1548,8 -1685,9 +1684,9 @@@ static int power_2up(unsigned int n
       return -1; // Input is too big
   }
   
- void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y,
-         int nblocks_win_x, int nblocks_win_y)
- -void cv::ocl::device::hog::set_up_constants(int nbins, 
- -                                            int block_stride_x, int block_stride_y, 
++void cv::ocl::device::hog::set_up_constants(int nbins,
++                                            int block_stride_x, int block_stride_y,
+                                             int nblocks_win_x, int nblocks_win_y)
   {
       cnbins = nbins;
       cblock_stride_x = block_stride_x;
@@@ -1570,19 -1706,24 +1705,23 @@@
       cdescr_size = descr_size;
   }
   
- void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y,
-         int height, int width, const cv::ocl::oclMat &grad,
-         const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists)
- -void cv::ocl::device::hog::compute_hists(int nbins, 
++void cv::ocl::device::hog::compute_hists(int nbins,
+                                          int block_stride_x, int block_stride_y,
- -                                         int height, int width, float sigma, 
- -                                         const cv::ocl::oclMat &grad, 
- -                                         const cv::ocl::oclMat &qangle, 
- -                                         const cv::ocl::oclMat &gauss_w_lut, 
++                                         int height, int width, float sigma,
++                                         const cv::ocl::oclMat &grad,
++                                         const cv::ocl::oclMat &qangle,
++                                         const cv::ocl::oclMat &gauss_w_lut,
+                                          cv::ocl::oclMat &block_hists)
   {
       Context *clCxt = Context::getContext();
-     String kernelName = "compute_hists_kernel";
- -    vector< pair<size_t, const void *> > args;
- -    string kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" : 
+ +    std::vector< std::pair<size_t, const void *> > args;
++    String kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" :
+         "compute_hists_kernel";
   
-     int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
-     int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
- 
-     size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 };
-     size_t localThreads[3] = { 32, 2, 1 };
- -    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) 
++    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
+         / block_stride_x;
- -    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) 
++    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y)
+         / block_stride_y;
- -    int blocks_total = img_block_width * img_block_height;
   
       int grad_quadstep = grad.step >> 2;
       int qangle_step = qangle.step;
@@@ -1590,163 -1731,241 +1729,239 @@@
       // Precompute gaussian spatial window parameter
       float scale = 1.f / (2.f * sigma * sigma);
   
- -    size_t globalThreads[3] = { 
+     int blocks_in_group = 4;
+     size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
++    size_t globalThreads[3] = {
+         divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 };
+ 
       int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
       int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
- -    int smem = (hists_size + final_hists_size) * blocks_in_group;
- -
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_x));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_y));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&blocks_in_group));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&blocks_total));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
+ +    int smem = hists_size + final_hists_size;
+ +
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_stride_x));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_stride_y));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
-     args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
+     if (kernelName.compare("compute_hists_lut_kernel") == 0)
- -        args.push_back( make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data));
++        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data));
+     else
- -        args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
- -    args.push_back( make_pair( smem, (void *)NULL));
++        args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+ +    args.push_back( std::make_pair( smem, (void *)NULL));
   
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
- -    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
++    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+         localThreads, args, -1, -1);
   }
   
- void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y,
-         int height, int width, cv::ocl::oclMat &block_hists, float threshold)
- -void cv::ocl::device::hog::normalize_hists(int nbins, 
++void cv::ocl::device::hog::normalize_hists(int nbins,
+                                            int block_stride_x, int block_stride_y,
- -                                           int height, int width, 
- -                                           cv::ocl::oclMat &block_hists, 
++                                           int height, int width,
++                                           cv::ocl::oclMat &block_hists,
+                                            float threshold)
   {
       Context *clCxt = Context::getContext();
-     String kernelName = "normalize_hists_kernel";
- -    vector< pair<size_t, const void *> > args;
- -    string kernelName;
+ +    std::vector< std::pair<size_t, const void *> > args;
++    String kernelName;
   
       int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
- -    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) 
- -        / block_stride_x;
- -    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) 
- -        / block_stride_y;
- -    int nthreads;
- -    size_t globalThreads[3] = { 1, 1, 1  };
- -    size_t localThreads[3] = { 1, 1, 1  };
- -    
+ +    int nthreads = power_2up(block_hist_size);
+ +
+ +    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
+ +    int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
+ +    size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 };
+ +    size_t localThreads[3] = { nthreads, 1, 1  };
+ +
-     if ((nthreads < 32) || (nthreads > 512) )
-         cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big", "cv::ocl::device::hog::normalize_hists", __FILE__, __LINE__);
+     if ( nbins == 9 )
+     {
+         /* optimized for the case of 9 bins */
+         kernelName = "normalize_hists_36_kernel";
+         int blocks_in_group = NTHREADS / block_hist_size;
+         nthreads = blocks_in_group * block_hist_size;
+         int num_groups = divUp( img_block_width * img_block_height, blocks_in_group);
+         globalThreads[0] = nthreads * num_groups;
+         localThreads[0] = nthreads;
+     }
+     else
+     {
+         kernelName = "normalize_hists_kernel";
+         nthreads = power_2up(block_hist_size);
+         globalThreads[0] = img_block_width * nthreads;
+         globalThreads[1] = img_block_height;
+         localThreads[0] = nthreads;
+ 
+         if ((nthreads < 32) || (nthreads > 512) )
- -            cv::ocl::error("normalize_hists: histogram's size is too small or too big", 
- -                __FILE__, __LINE__, "normalize_hists");
++            cv::error(Error::StsBadArg, "normalize_hists: histogram's size is too small or too big",
++                "normalize_hists", __FILE__, __LINE__);
+ 
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
+     }
   
-     args.push_back( std::make_pair( sizeof(cl_int), (void *)&nthreads));
-     args.push_back( std::make_pair( sizeof(cl_int), (void *)&block_hist_size));
-     args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
- -    args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
+ +    args.push_back( std::make_pair( nthreads * sizeof(float), (void *)NULL));
   
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+     if(hog_device_cpu)
- -        openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
++        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                              localThreads, args, -1, -1, "-D CPU");
+     else
- -        openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
++        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                              localThreads, args, -1, -1);
   }
   
- void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y,
-         int block_stride_x, int win_stride_y, int win_stride_x, int height,
-         int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
-         float threshold, cv::ocl::oclMat &labels)
- -void cv::ocl::device::hog::classify_hists(int win_height, int win_width, 
- -                                          int block_stride_y, int block_stride_x, 
- -                                          int win_stride_y, int win_stride_x, 
- -                                          int height, int width, 
- -                                          const cv::ocl::oclMat &block_hists, 
- -                                          const cv::ocl::oclMat &coefs, 
- -                                          float free_coef, float threshold, 
++void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
++                                          int block_stride_y, int block_stride_x,
++                                          int win_stride_y, int win_stride_x,
++                                          int height, int width,
++                                          const cv::ocl::oclMat &block_hists,
++                                          const cv::ocl::oclMat &coefs,
++                                          float free_coef, float threshold,
+                                           cv::ocl::oclMat &labels)
   {
       Context *clCxt = Context::getContext();
-     String kernelName = "classify_hists_kernel";
- -    vector< pair<size_t, const void *> > args;
+ +    std::vector< std::pair<size_t, const void *> > args;
   
- -    string kernelName;
+     int nthreads;
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height));
++    String kernelName;
+     switch (cdescr_width)
+     {
+     case 180:
+         nthreads = 180;
+         kernelName = "classify_hists_180_kernel";
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
+         break;
+     case 252:
+         nthreads = 256;
+         kernelName = "classify_hists_252_kernel";
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size));
- -        args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_height));
+         break;
+     default:
+         nthreads = 256;
+         kernelName = "classify_hists_kernel";
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
++        args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
+     }
+ 
       int win_block_stride_x = win_stride_x / block_stride_x;
       int win_block_stride_y = win_stride_y / block_stride_y;
       int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
       int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-     int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
- 
-     size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
-     size_t localThreads[3] = { NTHREADS, 1, 1 };
- -    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / 
++    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
+         block_stride_x;
   
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_win_width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&coefs.data));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&free_coef));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data));
+     size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
+     size_t localThreads[3] = { nthreads, 1, 1 };
-     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
-     args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_win_width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coefs.data));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&free_coef));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&threshold));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&labels.data));
   
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
+     if(hog_device_cpu)
- -        openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
++        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                              localThreads, args, -1, -1, "-D CPU");
+     else
- -        openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
++        openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+                              localThreads, args, -1, -1);
   }
   
- void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
-         int win_stride_y, int win_stride_x, int height, int width,
-         const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
- -void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, 
++void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
+                                                   int block_stride_y, int block_stride_x,
- -                                                  int win_stride_y, int win_stride_x, 
++                                                  int win_stride_y, int win_stride_x,
+                                                   int height, int width,
- -                                                  const cv::ocl::oclMat &block_hists, 
++                                                  const cv::ocl::oclMat &block_hists,
+                                                   cv::ocl::oclMat &descriptors)
   {
       Context *clCxt = Context::getContext();
- -    string kernelName = "extract_descrs_by_rows_kernel";
- -    vector< pair<size_t, const void *> > args;
+ +    String kernelName = "extract_descrs_by_rows_kernel";
+ +    std::vector< std::pair<size_t, const void *> > args;
   
       int win_block_stride_x = win_stride_x / block_stride_x;
       int win_block_stride_y = win_stride_y / block_stride_y;
       int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
       int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-     int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
- -    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / 
++    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
+         block_stride_x;
       int descriptors_quadstep = descriptors.step >> 2;
   
       size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
       size_t localThreads[3] = { NTHREADS, 1, 1 };
   
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- -
- -    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+ +
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
++    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+         localThreads, args, -1, -1);
   }
   
- void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
-         int win_stride_y, int win_stride_x, int height, int width,
-         const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
- -void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, 
++void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
+                                                   int block_stride_y, int block_stride_x,
- -                                                  int win_stride_y, int win_stride_x, 
++                                                  int win_stride_y, int win_stride_x,
+                                                   int height, int width,
- -                                                  const cv::ocl::oclMat &block_hists, 
++                                                  const cv::ocl::oclMat &block_hists,
+                                                   cv::ocl::oclMat &descriptors)
   {
       Context *clCxt = Context::getContext();
- -    string kernelName = "extract_descrs_by_cols_kernel";
- -    vector< pair<size_t, const void *> > args;
+ +    String kernelName = "extract_descrs_by_cols_kernel";
+ +    std::vector< std::pair<size_t, const void *> > args;
   
       int win_block_stride_x = win_stride_x / block_stride_x;
       int win_block_stride_y = win_stride_y / block_stride_y;
       int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
       int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
-     int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
- -    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / 
++    int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
+         block_stride_x;
       int descriptors_quadstep = descriptors.step >> 2;
   
       size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 };
       size_t localThreads[3] = { NTHREADS, 1, 1 };
   
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cnblocks_win_x));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cnblocks_win_y));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
- -
- -    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors_quadstep));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cdescr_size));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnblocks_win_x));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnblocks_win_y));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_block_width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_x));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&win_block_stride_y));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&block_hists.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
+ +
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
++    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+         localThreads, args, -1, -1);
   }
   
- static inline int divUp(int total, int grain)
- {
-     return (total + grain - 1) / grain;
- }
- 
- void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
-         float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
- -void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, 
++void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
+                                                   const cv::ocl::oclMat &img,
- -                                                  float angle_scale, 
- -                                                  cv::ocl::oclMat &grad, 
- -                                                  cv::ocl::oclMat &qangle, 
++                                                  float angle_scale,
++                                                  cv::ocl::oclMat &grad,
++                                                  cv::ocl::oclMat &qangle,
+                                                   bool correct_gamma)
   {
       Context *clCxt = Context::getContext();
- -    string kernelName = "compute_gradients_8UC1_kernel";
- -    vector< pair<size_t, const void *> > args;
+ +    String kernelName = "compute_gradients_8UC1_kernel";
+ +    std::vector< std::pair<size_t, const void *> > args;
   
       size_t localThreads[3] = { NTHREADS, 1, 1 };
       size_t globalThreads[3] = { width, height, 1 };
@@@ -1755,27 -1974,32 +1970,32 @@@
       int grad_quadstep = grad.step >> 3;
       int qangle_step = qangle.step >> 1;
   
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&height));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale));
- -    args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
- -
- -    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&angle_scale));
+ +    args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
+ +
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
++    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+         localThreads, args, -1, -1);
   }
   
- void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
-         float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
- -void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, 
++void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
+                                                   const cv::ocl::oclMat &img,
- -                                                  float angle_scale, 
- -                                                  cv::ocl::oclMat &grad, 
- -                                                  cv::ocl::oclMat &qangle, 
++                                                  float angle_scale,
++                                                  cv::ocl::oclMat &grad,
++                                                  cv::ocl::oclMat &qangle,
+                                                   bool correct_gamma)
   {
       Context *clCxt = Context::getContext();
- -    string kernelName = "compute_gradients_8UC4_kernel";
- -    vector< pair<size_t, const void *> > args;
+ +    String kernelName = "compute_gradients_8UC4_kernel";
+ +    std::vector< std::pair<size_t, const void *> > args;
   
       size_t localThreads[3] = { NTHREADS, 1, 1 };
       size_t globalThreads[3] = { width, height, 1 };
@@@ -1785,49 -2009,18 +2005,18 @@@
       int grad_quadstep = grad.step >> 3;
       int qangle_step = qangle.step >> 1;
   
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&height));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale));
- -    args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
- -
- -    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, 
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&angle_scale));
+ +    args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
+ +
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
- }
- 
- void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
- {
-     CV_Assert( (src.channels() == dst.channels()) );
-     Context *clCxt = Context::getContext();
- 
-     String kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel";
-     size_t blkSizeX = 16, blkSizeY = 16;
-     size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX;
-     size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY;
-     size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
-     size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
- 
-     float ifx = (float)src.cols / sz.width;
-     float ify = (float)src.rows / sz.height;
- 
-     std::vector< std::pair<size_t, const void *> > args;
-     args.push_back( std::make_pair(sizeof(cl_mem), (void *)&dst.data));
-     args.push_back( std::make_pair(sizeof(cl_mem), (void *)&src.data));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.offset));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.offset));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&dst.step));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.step));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.cols));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&src.rows));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.width));
-     args.push_back( std::make_pair(sizeof(cl_int), (void *)&sz.height));
-     args.push_back( std::make_pair(sizeof(cl_float), (void *)&ifx));
-     args.push_back( std::make_pair(sizeof(cl_float), (void *)&ify));
- 
-     openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
++    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
+         localThreads, args, -1, -1);
- -}
+ +}
diff --cc modules/ocl/src/matrix_operations.cpp

index 4f23789,1ff963a..e6af56d
--- 1/modules/ocl/src/matrix_operations.cpp
--- 2/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@@ -213,8 -225,37 +214,36 @@@ void cv::ocl::oclMat::upload(const Mat 
       rows = m.rows;
       cols = m.cols;
       offset = ofs.y * step + ofs.x * elemSize();
- -    //download_channels = m.channels();
   }
   
+ cv::ocl::oclMat::operator cv::_InputArray()
+ {
+     _InputArray newInputArray;
+     newInputArray.flags = cv::_InputArray::OCL_MAT;
+     newInputArray.obj   = reinterpret_cast<void *>(this);
+     return newInputArray;
+ }
+ 
+ cv::ocl::oclMat::operator cv::_OutputArray()
+ {
+     _OutputArray newOutputArray;
+     newOutputArray.flags = cv::_InputArray::OCL_MAT;
+     newOutputArray.obj   = reinterpret_cast<void *>(this);
+     return newOutputArray;
+ }
+ 
+ cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
+ {
+     CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
+     return *reinterpret_cast<oclMat*>(src.obj);
+ }
+ 
+ cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
+ {
+     CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
+     return *reinterpret_cast<oclMat*>(src.obj);
+ }
+ 
   void cv::ocl::oclMat::download(cv::Mat &m) const
   {
       CV_DbgAssert(!this->empty());
diff --cc modules/ocl/src/mcwutil.cpp

index 27f8d26,75314fb..d4fd47b
--- 1/modules/ocl/src/mcwutil.cpp
--- 2/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@@ -78,9 -78,9 +78,9 @@@ namespace c
           }
   
           // provide additional methods for the user to interact with the command queue after a task is fired
- -        static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
- -                                   size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
- -                                   int depth, char *build_options, FLUSH_MODE finish_mode)
+ +        static void openCLExecuteKernel_2(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
+ +                                   size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                                    int depth, char *build_options, FLUSH_MODE finish_mode)
++                                   int depth, const char *build_options, FLUSH_MODE finish_mode)
           {
               //construct kernel name
               //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
@@@ -131,9 -131,9 +131,9 @@@
               openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
                                    channels, depth, NULL, finish_mode);
           }
- -        void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
+ +        void openCLExecuteKernel2(Context *clCxt , const char **source, String kernelName,
                                     size_t globalThreads[3], size_t localThreads[3],
-                                   std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
- -                                  vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
++                                  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, FLUSH_MODE finish_mode)
   
           {
               openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
diff --cc modules/ocl/src/opencl/objdetect_hog.cl

index 64ae3ea,05d5383..509cf13
--- 1/modules/ocl/src/opencl/objdetect_hog.cl
--- 2/modules/ocl/src/opencl/objdetect_hog.cl
+++ b/modules/ocl/src/opencl/objdetect_hog.cl
@@@ -54,9 -53,103 +53,103 @@@
   //----------------------------------------------------------------------------
   // Histogram computation
   // 12 threads for a cell, 12x4 threads per block
+ // Use pre-computed gaussian and interp_weight lookup tables if sigma is 4.0f
+ __kernel void compute_hists_lut_kernel(
+     const int cblock_stride_x, const int cblock_stride_y,
+     const int cnbins, const int cblock_hist_size, const int img_block_width, 
+     const int blocks_in_group, const int blocks_total,
+     const int grad_quadstep, const int qangle_step,
+     __global const float* grad, __global const uchar* qangle,
+     __global const float* gauss_w_lut,
+     __global float* block_hists, __local float* smem)
+ {
+     const int lx = get_local_id(0);
+     const int lp = lx / 24; /* local group id */
+     const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */
+     const int gidY = gid / img_block_width;
+     const int gidX = gid - gidY * img_block_width;
+ 
+     const int lidX = lx - lp * 24;
+     const int lidY = get_local_id(1);
+ 
+     const int cell_x = lidX / 12;
+     const int cell_y = lidY;
+     const int cell_thread_x = lidX - cell_x * 12;
+ 
+     __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * 
+         CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y);
+     __local float* final_hist = hists + cnbins * 
+         (CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12);
+ 
+     const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
+     const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
+ 
+     __global const float* grad_ptr = (gid < blocks_total) ? 
+         grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
+     __global const uchar* qangle_ptr = (gid < blocks_total) ?
+         qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
+ 
+     __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + 
+         cell_thread_x;
+     for (int bin_id = 0; bin_id < cnbins; ++bin_id)
+         hist[bin_id * 48] = 0.f;
+ 
+     const int dist_x = -4 + cell_thread_x - 4 * cell_x;
+     const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
+ 
+     const int dist_y_begin = -4 - 4 * lidY;
+     for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
+     {
+         float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
+         uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
+ 
+         grad_ptr += grad_quadstep;
+         qangle_ptr += qangle_step;
+ 
+         int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
+ 
+         int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8);
+         float gaussian = gauss_w_lut[idx];
+         idx = (dist_y + 8) * 16 + (dist_x + 8);
+         float interp_weight = gauss_w_lut[256+idx];
+ 
+         hist[bin.x * 48] += gaussian * interp_weight * vote.x;
+         hist[bin.y * 48] += gaussian * interp_weight * vote.y;
+     }
+     barrier(CLK_LOCAL_MEM_FENCE);
+ 
+     volatile __local float* hist_ = hist;
+     for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
+     {
+         if (cell_thread_x < 6)
+             hist_[0] += hist_[6];
+         barrier(CLK_LOCAL_MEM_FENCE);
+         if (cell_thread_x < 3)
+             hist_[0] += hist_[3];
+ #ifdef CPU
+         barrier(CLK_LOCAL_MEM_FENCE);
+ #endif
+         if (cell_thread_x == 0)
+             final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = 
+                 hist_[0] + hist_[1] + hist_[2];
+     }
+     barrier(CLK_LOCAL_MEM_FENCE);
+ 
+     int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
+     if ((tid < cblock_hist_size) && (gid < blocks_total))
+     {
+         __global float* block_hist = block_hists + 
+             (gidY * img_block_width + gidX) * cblock_hist_size;
+         block_hist[tid] = final_hist[tid];
+     }
+ }
+ 
+ //----------------------------------------------------------------------------
+ // Histogram computation
+ // 12 threads for a cell, 12x4 threads per block
   __kernel void compute_hists_kernel(
       const int cblock_stride_x, const int cblock_stride_y,
- -    const int cnbins, const int cblock_hist_size, const int img_block_width, 
+ +    const int cnbins, const int cblock_hist_size, const int img_block_width,
       const int blocks_in_group, const int blocks_total,
       const int grad_quadstep, const int qangle_step,
       __global const float* grad, __global const uchar* qangle,
@@@ -129,12 -222,10 +222,10 @@@
           barrier(CLK_LOCAL_MEM_FENCE);
   #endif
           if (cell_thread_x == 0)
- -            final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = 
+ +            final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
                   hist_[0] + hist_[1] + hist_[2];
       }
- #ifdef WAVE_SIZE_1
       barrier(CLK_LOCAL_MEM_FENCE);
- #endif
   
       int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
       if ((tid < cblock_hist_size) && (gid < blocks_total))
diff --cc modules/ocl/test/test_imgproc.cpp
Simple merge
diff --cc modules/ocl/test/test_objdetect.cpp

index e968d04,86590f7..ad35270
--- 1/modules/ocl/test/test_hog.cpp
--- 2/modules/ocl/test/test_objdetect.cpp
+++ b/modules/ocl/test/test_objdetect.cpp
@@@ -44,8 -44,11 +44,14 @@@
   //M*/
   
   #include "precomp.hpp"
- -#include "opencv2/core/core.hpp"
- -#include "opencv2/objdetect/objdetect.hpp"
   
++#include "opencv2/objdetect.hpp"
++#include "opencv2/objdetect/objdetect_c.h"
++
+ +using namespace std;
+ using namespace cv;
+ using namespace testing;
++
   #ifdef HAVE_OPENCL
   
   extern string workdir;
@@@ -71,19 -78,19 +81,19 @@@ TEST_P(HOG, GetDescriptors
       switch (type)
       {
       case CV_8UC1:
-         cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
- -        cvtColor(img_rgb, img, CV_BGR2GRAY);
++        cvtColor(img_rgb, img, COLOR_BGR2GRAY);
           break;
       case CV_8UC4:
       default:
-         cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
- -        cvtColor(img_rgb, img, CV_BGR2BGRA);
++        cvtColor(img_rgb, img, COLOR_BGR2BGRA);
           break;
       }
-     cv::ocl::oclMat d_img(img);
+     ocl::oclMat d_img(img);
   
       // HOGs
-     cv::ocl::HOGDescriptor ocl_hog;
+     ocl::HOGDescriptor ocl_hog;
       ocl_hog.gamma_correction = true;
-     cv::HOGDescriptor hog;
+     HOGDescriptor hog;
       hog.gammaCorrection = true;
   
       // Compute descriptor
@@@ -128,22 -124,22 +127,22 @@@ TEST_P(HOG, Detect
       switch (type)
       {
       case CV_8UC1:
-         cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
- -        cvtColor(img_rgb, img, CV_BGR2GRAY);
++        cvtColor(img_rgb, img, COLOR_BGR2GRAY);
           break;
       case CV_8UC4:
       default:
-         cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
- -        cvtColor(img_rgb, img, CV_BGR2BGRA);
++        cvtColor(img_rgb, img, COLOR_BGR2BGRA);
           break;
       }
-     cv::ocl::oclMat d_img(img);
+     ocl::oclMat d_img(img);
   
       // HOGs
-     if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
-         winSize = cv::Size(64, 128);
-     cv::ocl::HOGDescriptor ocl_hog(winSize);
+     if ((winSize != Size(48, 96)) && (winSize != Size(64, 128)))
+         winSize = Size(64, 128);
+     ocl::HOGDescriptor ocl_hog(winSize);
       ocl_hog.gamma_correction = true;
   
-     cv::HOGDescriptor hog;
+     HOGDescriptor hog;
       hog.winSize = winSize;
       hog.gammaCorrection = true;
   
@@@ -181,72 -177,101 +180,103 @@@
           break;
       }
   
-     // Ground-truth rectangular people window
-     cv::Rect win1_64x128(231, 190, 72, 144);
-     cv::Rect win2_64x128(621, 156, 97, 194);
-     cv::Rect win1_48x96(238, 198, 63, 126);
-     cv::Rect win2_48x96(619, 161, 92, 185);
-     cv::Rect win3_48x96(488, 136, 56, 112);
- 
-     // Compare whether ground-truth windows are detected and compare the number of windows detected.
-     std::vector<int> d_comp(4);
-     std::vector<int> comp(4);
-     for(int i = 0; i < (int)d_comp.size(); i++)
-     {
-         d_comp[i] = 0;
-         comp[i] = 0;
-     }
+     EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0);
+ }
+ 
   
-     int threshold = 10;
-     int val = 32;
-     d_comp[0] = (int)d_found.size();
-     comp[0] = (int)found.size();
-     if (winSize == cv::Size(48, 96))
+ INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
+                             testing::Values(Size(64, 128), Size(48, 96)),
+                             testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
+ 
++#if 0
+ ///////////////////////////// Haar //////////////////////////////
+ IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
+ CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
+ CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
+ struct getRect
+ {
+     Rect operator ()(const CvAvgComp &e) const
       {
-         for(int i = 0; i < (int)d_found.size(); i++)
-         {
-             if (match_rect(d_found[i], win1_48x96, threshold))
-                 d_comp[1] = val;
-             if (match_rect(d_found[i], win2_48x96, threshold))
-                 d_comp[2] = val;
-             if (match_rect(d_found[i], win3_48x96, threshold))
-                 d_comp[3] = val;
-         }
-         for(int i = 0; i < (int)found.size(); i++)
-         {
-             if (match_rect(found[i], win1_48x96, threshold))
-                 comp[1] = val;
-             if (match_rect(found[i], win2_48x96, threshold))
-                 comp[2] = val;
-             if (match_rect(found[i], win3_48x96, threshold))
-                 comp[3] = val;
-         }
+         return e.rect;
       }
-     else if (winSize == cv::Size(64, 128))
+ };
+ 
+ PARAM_TEST_CASE(Haar, int, CascadeName)
+ {
+     ocl::OclCascadeClassifier cascade, nestedCascade;
+     CascadeClassifier cpucascade, cpunestedCascade;
+ 
+     int flags;
+     std::string cascadeName;
+     vector<Rect> faces, oclfaces;
+     Mat img;
+     ocl::oclMat d_img;
+ 
+     virtual void SetUp()
       {
-         for(int i = 0; i < (int)d_found.size(); i++)
+         flags = GET_PARAM(0);
+         cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(1));
+         if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
           {
-             if (match_rect(d_found[i], win1_64x128, threshold))
-                 d_comp[1] = val;
-             if (match_rect(d_found[i], win2_64x128, threshold))
-                 d_comp[2] = val;
+             std::cout << "ERROR: Could not load classifier cascade" << std::endl;
+             return;
           }
-         for(int i = 0; i < (int)found.size(); i++)
+         img = readImage(workdir + "lena.jpg", IMREAD_GRAYSCALE);
+         if(img.empty())
           {
-             if (match_rect(found[i], win1_64x128, threshold))
-                 comp[1] = val;
-             if (match_rect(found[i], win2_64x128, threshold))
-                 comp[2] = val;
+             std::cout << "Couldn't read lena.jpg" << std::endl;
+             return ;
           }
+         equalizeHist(img, img);
+         d_img.upload(img);
       }
+ };
   
-     EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3);
+ TEST_P(Haar, FaceDetect)
+ {
+     MemStorage storage(cvCreateMemStorage(0));
+     CvSeq *_objects;
- -    _objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3, 
++    _objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3,
+                                             flags, Size(30, 30), Size(0, 0));
+     vector<CvAvgComp> vecAvgComp;
+     Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
+     oclfaces.resize(vecAvgComp.size());
+     std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
- -    
++
+     cpucascade.detectMultiScale(img, faces,  1.1, 3,
+                                 flags,
+                                 Size(30, 30), Size(0, 0));
+ 
+     EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
   }
   
+ TEST_P(Haar, FaceDetectUseBuf)
+ {
+     ocl::OclCascadeClassifierBuf cascadebuf;
+     if(!cascadebuf.load(cascadeName))
+     {
+         std::cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << std::endl;
+         return;
+     }
+     cascadebuf.detectMultiScale(d_img, oclfaces,  1.1, 3,
+                                 flags,
+                                 Size(30, 30), Size(0, 0));
+     cpucascade.detectMultiScale(img, faces,  1.1, 3,
+                                 flags,
+                                 Size(30, 30), Size(0, 0));
+ 
+     // intentionally run ocl facedetect again and check if it still works after the first run
+     cascadebuf.detectMultiScale(d_img, oclfaces,  1.1, 3,
+                                 flags,
+                                 Size(30, 30));
+     cascadebuf.release();
   
- INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
-                             testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
-                             testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
+     EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0);
+ }
   
- -    Combine(Values(CV_HAAR_SCALE_IMAGE, 0), 
+ INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar,
++    Combine(Values(CV_HAAR_SCALE_IMAGE, 0),
+             Values(cascade_frontalface_alt/*, cascade_frontalface_alt2*/)));
++#endif
   
- -#endif //HAVE_OPENCL
+ +#endif //HAVE_OPENCL
diff --cc modules/ocl/test/utility.cpp
Simple merge
diff --cc modules/ocl/test/utility.hpp
Simple merge
diff --cc modules/ts/src/ts_func.cpp

index 0f3751e,9b6b535..3b1c7ca
--- 1/modules/ts/src/ts_func.cpp
--- 2/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@@@ -2954,13 -2956,72 +2959,60 @@@ void printVersionInfo(bool useStdOut
       }
   
       pos1 = buildInfo.find("inner version");
-     pos2 = buildInfo.find("\n", pos1);\
+     pos2 = buildInfo.find('\n', pos1);
       if(pos1 != std::string::npos && pos2 != std::string::npos)
       {
-         std::string ver( buildInfo.substr(pos1, pos2-pos1) );
-         ::testing::Test::RecordProperty("inner_version", ver);
-         if(useStdOut) std::cout << ver << std::endl;
-     }
+         size_t value_start = buildInfo.rfind(' ', pos2) + 1;
+         std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
+         ::testing::Test::RecordProperty("cv_inner_vcs_version", ver);
+         if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl;
+     }
+ 
+ #ifdef CV_PARALLEL_FRAMEWORK
+     ::testing::Test::RecordProperty("cv_parallel_framework", CV_PARALLEL_FRAMEWORK);
+     if (useStdOut)
+     {
+         std::cout << "Parallel framework: " << CV_PARALLEL_FRAMEWORK << std::endl;
+     }
+ #endif
+ 
+     std::string cpu_features;
+ 
+ #if CV_SSE
+     if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse";
+ #endif
+ #if CV_SSE2
+     if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2";
+ #endif
+ #if CV_SSE3
+     if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3";
+ #endif
+ #if CV_SSSE3
+     if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3";
+ #endif
+ #if CV_SSE4_1
+     if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1";
+ #endif
+ #if CV_SSE4_2
+     if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2";
+ #endif
+ #if CV_AVX
+     if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx";
+ #endif
+ #if CV_NEON
+     cpu_features += " neon"; // NEON is currently not checked at runtime
+ #endif
+ 
+     cpu_features.erase(0, 1); // erase initial space
+ 
+     ::testing::Test::RecordProperty("cv_cpu_features", cpu_features);
+     if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl;
+ 
+ #ifdef HAVE_TEGRA_OPTIMIZATION
+     const char * tegra_optimization = tegra::isDeviceSupported() ? "enabled" : "disabled";
+     ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
+     if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
+ #endif
   }
   
- -} //namespace cvtest
- -
- -void cvTsConvert( const CvMat* src, CvMat* dst )
- -{
- -    Mat _src = cvarrToMat(src), _dst = cvarrToMat(dst);
- -    cvtest::convert(_src, _dst, _dst.depth());
- -}
- -
- -void cvTsZero( CvMat* dst, const CvMat* mask )
- -{
- -    Mat _dst = cvarrToMat(dst), _mask = mask ? cvarrToMat(mask) : Mat();
- -    cvtest::set(_dst, Scalar::all(0), _mask);
   }
diff --cc samples/ocl/facedetect.cpp

index 5ffed2e,a49610a..4ab92af
--- 1/samples/ocl/facedetect.cpp
--- 2/samples/ocl/facedetect.cpp
+++ b/samples/ocl/facedetect.cpp
@@@ -36,22 -32,25 +39,24 @@@ static void workEnd(
   {
       work_end += (getTickCount() - work_begin);
   }
- 
- static double getTime(){
+ +
+ static double getTime()
+ {
       return work_end /((double)cvGetTickFrequency() * 1000.);
   }
   
- -
   void detect( Mat& img, vector<Rect>& faces,
-     cv::ocl::OclCascadeClassifierBuf& cascade,
-     double scale, bool calTime);
+              ocl::OclCascadeClassifierBuf& cascade,
+              double scale, bool calTime);
+ 
   
   void detectCPU( Mat& img, vector<Rect>& faces,
-     CascadeClassifier& cascade,
-     double scale, bool calTime);
+                 CascadeClassifier& cascade,
+                 double scale, bool calTime);
   
- -
   void Draw(Mat& img, vector<Rect>& faces, double scale);
   
+ 
   // This function test if gpu_rst matches cpu_rst.
   // If the two vectors are not equal, it will return the difference in vector size
   // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
@@@ -194,26 -205,27 +211,26 @@@ _cleanup_
   }
   
   void detect( Mat& img, vector<Rect>& faces,
-     cv::ocl::OclCascadeClassifierBuf& cascade,
-     double scale, bool calTime)
+              ocl::OclCascadeClassifierBuf& cascade,
+              double scale, bool calTime)
   {
-     cv::ocl::oclMat image(img);
-     cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+     ocl::oclMat image(img);
+     ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
       if(calTime) workBegin();
-     cv::ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
-     cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
-     cv::ocl::equalizeHist( smallImg, smallImg );
- -    ocl::cvtColor( image, gray, CV_BGR2GRAY );
++    ocl::cvtColor( image, gray, COLOR_BGR2GRAY );
+     ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+     ocl::equalizeHist( smallImg, smallImg );
   
       cascade.detectMultiScale( smallImg, faces, 1.1,
-         3, 0
-         |CV_HAAR_SCALE_IMAGE
-         , Size(30,30), Size(0, 0) );
+                               3, 0
+                               |CV_HAAR_SCALE_IMAGE
+                               , Size(30,30), Size(0, 0) );
       if(calTime) workEnd();
   }
   
- -
   void detectCPU( Mat& img, vector<Rect>& faces,
-     CascadeClassifier& cascade,
-     double scale, bool calTime)
+                 CascadeClassifier& cascade,
+                 double scale, bool calTime)
   {
       if(calTime) workBegin();
       Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
diff --cc samples/ocl/hog.cpp

index daff267,ff53e01..b26c98c
--- 1/samples/ocl/hog.cpp
--- 2/samples/ocl/hog.cpp
+++ b/samples/ocl/hog.cpp
@@@ -329,17 -224,18 +225,18 @@@ void App::run(
                       // verify if GPU output same objects with CPU at 1st run
                       verify = true;
                       vector<Rect> ref_rst;
- -                    cvtColor(img, img, CV_BGRA2BGR);
+ +                    cvtColor(img, img, COLOR_BGRA2BGR);
                       cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
-                                               Size(0, 0), scale, gr_threshold-2);
+                                              Size(0, 0), scale, gr_threshold-2);
                       double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
-                     cout << "\naccuracy value: " << accuracy << endl;           
-                 } 
-            }
+                     cout << "\naccuracy value: " << accuracy << endl;
+                 }
+             }
               else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
-                                           Size(0, 0), scale, gr_threshold);
+                                               Size(0, 0), scale, gr_threshold);
               hogWorkEnd();
   
+ 
               // Draw positive classified windows
               for (size_t i = 0; i < found.size(); i++)
               {
@@@ -359,20 -254,27 +255,27 @@@
   
               workEnd();
   
-             if (args.write_video)
+             if (output!="")
               {
-                 if (!video_writer.isOpened())
+                 if (img_source!="")     // wirte image
                   {
-                     video_writer.open(args.dst_video, VideoWriter::fourcc('x','v','i','d'), args.dst_video_fps,
-                                       img_to_show.size(), true);
-                     if (!video_writer.isOpened())
-                         throw std::runtime_error("can't create video writer");
+                     imwrite(output, img_to_show);
                   }
- -                        video_writer.open(output, CV_FOURCC('x','v','i','d'), 24,
+                 else                    //write video
+                 {
+                     if (!video_writer.isOpened())
+                     {
++                        video_writer.open(output, VideoWriter::fourcc('x','v','i','d'), 24,
+                                           img_to_show.size(), true);
+                         if (!video_writer.isOpened())
+                             throw std::runtime_error("can't create video writer");
+                     }
   
-                 if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
-                 else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
- -                    if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR);
- -                    else cvtColor(img_to_show, img, CV_BGRA2BGR);
++                    if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
++                    else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
   
-                 video_writer << img;
+                     video_writer << img;
+                 }
               }
   
               handleKey((char)waitKey(3));
@@@ -511,10 -424,11 +425,10 @@@ double App::checkRectSimilarity(Size sz
           cv::Mat result_;
           multiply(cpu_result, gpu_result, result_);
           int result = cv::countNonZero(result_ > 0);
- 
-         final_test_result = 1.0 - (double)result/(double)cpu_area;
+         if(cpu_area!=0 && result!=0)
+             final_test_result = 1.0 - (double)result/(double)cpu_area;
+         else if(cpu_area==0 && result!=0)
+             final_test_result = -1;
       }
       return final_test_result;
- 
   }
--
diff --cc samples/ocl/pyrlk_optical_flow.cpp

index 3ce0edc,cefa928..f7d0661
--- 1/samples/ocl/pyrlk_optical_flow.cpp
--- 2/samples/ocl/pyrlk_optical_flow.cpp
+++ b/samples/ocl/pyrlk_optical_flow.cpp
@@@ -94,14 -94,15 +95,15 @@@ int main(int argc, const char* argv[]
       //set this to save kernel compile time from second time you run
       ocl::setBinpath("./");
       const char* keys =
-         "{ help h           | false | print help message }"
-         "{ left l           |       | specify left image }"
-         "{ right r          |       | specify right image }"
-         "{ camera c         | 0     | enable camera capturing }"
-         "{ use_cpu s        | false | use cpu or gpu to process the image }"
-         "{ video v          |       | use video as input }"
-         "{ points           | 1000  | specify points count [GoodFeatureToTrack] }"
-         "{ min_dist         | 0     | specify minimal distance between points [GoodFeatureToTrack] }";
- -        "{ h   | help     | false           | print help message }"
- -        "{ l   | left     |                 | specify left image }"
- -        "{ r   | right    |                 | specify right image }"
- -        "{ c   | camera   | 0               | specify camera id }"
- -        "{ s   | use_cpu  | false           | use cpu or gpu to process the image }"
- -        "{ v   | video    |                 | use video as input }"
- -        "{ o   | output   | pyrlk_output.jpg| specify output save path when input is images }"
- -        "{ p   | points   | 1000            | specify points count [GoodFeatureToTrack] }"
- -        "{ m   | min_dist | 0               | specify minimal distance between points [GoodFeatureToTrack] }";
++        "{ help h           | false           | print help message }"
++        "{ left l           |                 | specify left image }"
++        "{ right r          |                 | specify right image }"
++        "{ camera c         | 0               | enable camera capturing }"
++        "{ use_cpu s        | false           | use cpu or gpu to process the image }"
++        "{ video v          |                 | use video as input }"
++        "{ output o         | pyrlk_output.jpg| specify output save path when input is images }"
++        "{ points           | 1000            | specify points count [GoodFeatureToTrack] }"
++        "{ min_dist         | 0               | specify minimal distance between points [GoodFeatureToTrack] }";
   
       CommandLineParser cmd(argc, argv, keys);
   
@@@ -112,13 -115,13 +114,13 @@@
       }
   
       bool defaultPicturesFail = false;
- -    string fname0 = cmd.get<string>("l");
- -    string fname1 = cmd.get<string>("r");
- -    string vdofile = cmd.get<string>("v");
- -    string outfile = cmd.get<string>("o");
- -    int points = cmd.get<int>("p");
- -    double minDist = cmd.get<double>("m");
- -    bool useCPU = cmd.get<bool>("s");
+ +    string fname0 = cmd.get<string>("left");
+ +    string fname1 = cmd.get<string>("right");
+ +    string vdofile = cmd.get<string>("video");
++    string outfile = cmd.get<string>("output");
+ +    int points = cmd.get<int>("points");
+ +    double minDist = cmd.get<double>("min_dist");
+ +    bool useCPU = cmd.has("s");
-     bool useCamera = cmd.has("c");
       int inputName = cmd.get<int>("c");
   
       oclMat d_nextPts, d_status;
@@@ -131,23 -134,11 +133,11 @@@
       vector<unsigned char> status(points);
       vector<float> err;
   
-     if (frame0.empty() || frame1.empty())
-     {
-         useCamera = true;
-         defaultPicturesFail = true;
-         VideoCapture capture(inputName);
-         if (!capture.isOpened())
-         {
-             cout << "Can't load input images" << endl;
-             return -1;
-         }
-     }
- 
       cout << "Points count : " << points << endl << endl;
   
-     if (useCamera)
+     if (frame0.empty() || frame1.empty())
       {
- -        CvCapture* capture = 0;
+ +        VideoCapture capture;
           Mat frame, frameCopy;
           Mat frame0Gray, frame1Gray;
           Mat ptr0, ptr1;
diff --cc samples/ocl/squares.cpp

index 40d60fe,48964ff..d31e360
--- 1/samples/ocl/squares.cpp
--- 2/samples/ocl/squares.cpp
+++ b/samples/ocl/squares.cpp
@@@ -2,7 -2,7 +2,8 @@@
   // It loads several images sequentially and tries to find squares in
   // each image
   
--#include "opencv2/core/core.hpp"
++#include "opencv2/core.hpp"
++#include "opencv2/core/utility.hpp"
   #include "opencv2/imgproc/imgproc.hpp"
   #include "opencv2/highgui/highgui.hpp"
   #include "opencv2/ocl/ocl.hpp"
@@@ -48,6 -75,88 +76,88 @@@ static double angle( Point pt1, Point p
   static void findSquares( const Mat& image, vector<vector<Point> >& squares )
   {
       squares.clear();
- -            findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
+     Mat pyr, timg, gray0(image.size(), CV_8U), gray;
+ 
+     // down-scale and upscale the image to filter out the noise
+     pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
+     pyrUp(pyr, timg, image.size());
+     vector<vector<Point> > contours;
+ 
+     // find squares in every color plane of the image
+     for( int c = 0; c < 3; c++ )
+     {
+         int ch[] = {c, 0};
+         mixChannels(&timg, 1, &gray0, 1, ch, 1);
+ 
+         // try several threshold levels
+         for( int l = 0; l < N; l++ )
+         {
+             // hack: use Canny instead of zero threshold level.
+             // Canny helps to catch squares with gradient shading
+             if( l == 0 )
+             {
+                 // apply Canny. Take the upper threshold from slider
+                 // and set the lower to 0 (which forces edges merging)
+                 Canny(gray0, gray, 0, thresh, 5);
+                 // dilate canny output to remove potential
+                 // holes between edge segments
+                 dilate(gray, gray, Mat(), Point(-1,-1));
+             }
+             else
+             {
+                 // apply threshold if l!=0:
+                 //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
+                 cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
+             }
+ 
+             // find contours and store them all as a list
++            findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
+ 
+             vector<Point> approx;
+ 
+             // test each contour
+             for( size_t i = 0; i < contours.size(); i++ )
+             {
+                 // approximate contour with accuracy proportional
+                 // to the contour perimeter
+                 approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
+ 
+                 // square contours should have 4 vertices after approximation
+                 // relatively large area (to filter out noisy contours)
+                 // and be convex.
+                 // Note: absolute value of an area is used because
+                 // area may be positive or negative - in accordance with the
+                 // contour orientation
+                 if( approx.size() == 4 &&
+                         fabs(contourArea(Mat(approx))) > 1000 &&
+                         isContourConvex(Mat(approx)) )
+                 {
+                     double maxCosine = 0;
+ 
+                     for( int j = 2; j < 5; j++ )
+                     {
+                         // find the maximum cosine of the angle between joint edges
+                         double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
+                         maxCosine = MAX(maxCosine, cosine);
+                     }
+ 
+                     // if cosines of all angles are small
+                     // (all angles are ~90 degree) then write quandrange
+                     // vertices to resultant sequence
+                     if( maxCosine < 0.3 )
+                         squares.push_back(approx);
+                 }
+             }
+         }
+     }
+ }
+ 
+ 
+ // returns sequence of squares detected on the image.
+ // the sequence is stored in the specified memory storage
+ static void findSquares_ocl( const Mat& image, vector<vector<Point> >& squares )
+ {
+     squares.clear();
   
       Mat gray;
       cv::ocl::oclMat pyr_ocl, timg_ocl, gray0_ocl, gray_ocl;
@@@ -88,10 -197,9 +198,9 @@@
               }
   
               // find contours and store them all as a list
- -            findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
+ +            findContours(gray, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
   
               vector<Point> approx;
- 
               // test each contour
               for( size_t i = 0; i < contours.size(); i++ )
               {
@@@ -137,42 -244,95 +245,95 @@@ static void drawSquares( Mat& image, co
       {
           const Point* p = &squares[i][0];
           int n = (int)squares[i].size();
- -        polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA);
+ +        polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, LINE_AA);
       }
+ }
+ 
   
-     imshow(wndname, image);
+ // draw both pure-C++ and ocl square results onto a single image
+ static Mat drawSquaresBoth( const Mat& image,
+                             const vector<vector<Point> >& sqsCPP,
+                             const vector<vector<Point> >& sqsOCL
+ )
+ {
+     Mat imgToShow(Size(image.cols * 2, image.rows), image.type());
+     Mat lImg = imgToShow(Rect(Point(0, 0), image.size()));
+     Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size()));
+     image.copyTo(lImg);
+     image.copyTo(rImg);
+     drawSquares(lImg, sqsCPP);
+     drawSquares(rImg, sqsOCL);
+     float fontScale = 0.8f;
+     Scalar white = Scalar::all(255), black = Scalar::all(0);
+ 
+     putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
+     putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
+     putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
+     putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
+ 
+     return imgToShow;
   }
   
   
- int main(int /*argc*/, char** /*argv*/)
+ int main(int argc, char** argv)
   {
- -        cmd.printParams();
+     const char* keys =
+         "{ i | input   |                    | specify input image }"
+         "{ o | output  | squares_output.jpg | specify output save path}";
+     CommandLineParser cmd(argc, argv, keys);
+     string inputName = cmd.get<string>("i");
+     string outfile = cmd.get<string>("o");
+     if(inputName.empty())
+     {
+         cout << "Avaible options:" << endl;
++        cmd.printMessage();
+         return 0;
+     }
   
-     //ocl::setBinpath("F:/kernel_bin");
       vector<ocl::Info> info;
       CV_Assert(ocl::getDevice(info));
- 
-     static const char* names[] = { "pic1.png", "pic2.png", "pic3.png",
-         "pic4.png", "pic5.png", "pic6.png", 0 };
-     help();
+     int iterations = 10;
       namedWindow( wndname, 1 );
-     vector<vector<Point> > squares;
+     vector<vector<Point> > squares_cpu, squares_ocl;
   
-     for( int i = 0; names[i] != 0; i++ )
+     Mat image = imread(inputName, 1);
+     if( image.empty() )
       {
-         Mat image = imread(names[i], 1);
-         if( image.empty() )
-         {
-             cout << "Couldn't load " << names[i] << endl;
-             continue;
-         }
+         cout << "Couldn't load " << inputName << endl;
+         return -1;
+     }
+     int j = iterations;
+     int64 t_ocl = 0, t_cpp = 0;
+     //warm-ups
+     cout << "warming up ..." << endl;
+     findSquares(image, squares_cpu);
+     findSquares_ocl(image, squares_ocl);
+ 
+ 
+ #if ACCURACY_CHECK
+     cout << "Checking ocl accuracy ... " << endl;
+     cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl;
+ #endif
+     do
+     {
+         int64 t_start = cv::getTickCount();
+         findSquares(image, squares_cpu);
+         t_cpp += cv::getTickCount() - t_start;
   
-         findSquares(image, squares);
-         drawSquares(image, squares);
   
-         int c = waitKey();
-         if( (char)c == 27 )
-             break;
+         t_start  = cv::getTickCount();
+         findSquares_ocl(image, squares_ocl);
+         t_ocl += cv::getTickCount() - t_start;
+         cout << "run loop: " << j << endl;
       }
- -    cvWaitKey(0);
+     while(--j);
+     cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
+     cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl;
+ 
+     Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl);
+     imshow(wndname, result);
+     imwrite(outfile, result);
++    waitKey(0);
   
       return 0;
   }
diff --cc samples/ocl/stereo_match.cpp

index 8cc6530,abe75c7..8737a04
--- 1/samples/ocl/stereo_match.cpp
--- 2/samples/ocl/stereo_match.cpp
+++ b/samples/ocl/stereo_match.cpp
@@@ -88,22 -71,30 +73,30 @@@ private
   
   int main(int argc, char** argv)
   {
- -        cmd.printParams();
+     const char* keys =
+         "{ h | help     | false                     | print help message }"
+         "{ l | left     |                           | specify left image }"
+         "{ r | right    |                           | specify right image }"
+         "{ m | method   | BM                        | specify match method(BM/BP/CSBP) }"
+         "{ n | ndisp    | 64                        |  specify number of disparity levels }"
+         "{ s | cpu_ocl  | false                     | use cpu or gpu as ocl device to process the image }"
+         "{ o | output   | stereo_match_output.jpg   | specify output path when input is images}";
+     CommandLineParser cmd(argc, argv, keys);
+     if (cmd.get<bool>("help"))
+     {
+         cout << "Avaible options:" << endl;
++        cmd.printMessage();
+         return 0;
+     }
       try
       {
-         if (argc < 2)
-         {
-             printHelp();
-             return 1;
-         }
- 
-         Params args = Params::read(argc, argv);
-         if (help_showed)
-             return -1;
+         App app(cmd);
+         int flag = CVCL_DEVICE_TYPE_GPU;
+         if(cmd.get<bool>("s") == true)
+             flag = CVCL_DEVICE_TYPE_CPU;
   
-         int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
           vector<Info> info;
- 
-         if(getDevice(info, flags[args.type]) == 0)
+         if(getDevice(info, flag) == 0)
           {
               throw runtime_error("Error: Did not find a valid OpenCL device!");
           }
@@@ -186,13 -138,13 +140,13 @@@ App::App(CommandLineParser& cmd
   void App::run()
   {
       // Load images
-     left_src = imread(p.left);
-     right_src = imread(p.right);
-     if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
-     if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
+     left_src = imread(l_img);
+     right_src = imread(r_img);
+     if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\"");
+     if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\"");
   
- -    cvtColor(left_src, left, CV_BGR2GRAY);
- -    cvtColor(right_src, right, CV_BGR2GRAY);
+ +    cvtColor(left_src, left, COLOR_BGR2GRAY);
+ +    cvtColor(right_src, right, COLOR_BGR2GRAY);
   
       d_left.upload(left);
       d_right.upload(right);
@@@ -417,5 -380,5 +382,3 @@@ void App::handleKey(char key
           break;
       }
   }
--
--
diff --cc samples/ocl/surf_matcher.cpp

index e938a77,bee517f..2961980
--- 1/samples/ocl/surf_matcher.cpp
--- 2/samples/ocl/surf_matcher.cpp
+++ b/samples/ocl/surf_matcher.cpp
@@@ -82,8 -28,9 +29,9 @@@ void workEnd(
   {
       work_end = getTickCount() - work_begin;
   }
- double getTime(){
+ double getTime()
+ {
- -    return work_end /((double)cvGetTickFrequency() * 1000.);
+ +    return work_end /((double)getTickFrequency() * 1000.);
   }
   
   template<class KPDetector>
@@@ -140,8 -87,8 +88,8 @@@ Mat drawGoodMatches
       // drawing the results
       Mat img_matches;
       drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
-         good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
-         std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
+                  good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
- -                 vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
++                 std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS  );
   
       //-- Localize the object
       std::vector<Point2f> obj;
@@@ -155,28 -102,30 +103,30 @@@
       }
       //-- Get the corners from the image_1 ( the object to be "detected" )
       std::vector<Point2f> obj_corners(4);
-     obj_corners[0] = Point(0,0); obj_corners[1] = Point( cpu_img1.cols, 0 );
-     obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = Point( 0, cpu_img1.rows );
- -    obj_corners[0] = cvPoint(0,0);
- -    obj_corners[1] = cvPoint( cpu_img1.cols, 0 );
- -    obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows );
- -    obj_corners[3] = cvPoint( 0, cpu_img1.rows );
++    obj_corners[0] = Point(0,0);
++    obj_corners[1] = Point( cpu_img1.cols, 0 );
++    obj_corners[2] = Point( cpu_img1.cols, cpu_img1.rows );
++    obj_corners[3] = Point( 0, cpu_img1.rows );
       std::vector<Point2f> scene_corners(4);
   
- -    Mat H = findHomography( obj, scene, CV_RANSAC );
+ +    Mat H = findHomography( obj, scene, RANSAC );
       perspectiveTransform( obj_corners, scene_corners, H);
   
       scene_corners_ = scene_corners;
   
       //-- Draw lines between the corners (the mapped object in the scene - image_2 )
       line( img_matches,
-         scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
-         Scalar( 0, 255, 0), 2, LINE_AA );
+           scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
- -          Scalar( 0, 255, 0), 2, CV_AA );
++          Scalar( 0, 255, 0), 2, LINE_AA );
       line( img_matches,
-         scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
-         Scalar( 0, 255, 0), 2, LINE_AA );
+           scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
- -          Scalar( 0, 255, 0), 2, CV_AA );
++          Scalar( 0, 255, 0), 2, LINE_AA );
       line( img_matches,
-         scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
-         Scalar( 0, 255, 0), 2, LINE_AA );
+           scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
- -          Scalar( 0, 255, 0), 2, CV_AA );
++          Scalar( 0, 255, 0), 2, LINE_AA );
       line( img_matches,
-         scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
-         Scalar( 0, 255, 0), 2, LINE_AA );
+           scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
- -          Scalar( 0, 255, 0), 2, CV_AA );
++          Scalar( 0, 255, 0), 2, LINE_AA );
       return img_matches;
   }
   
@@@ -186,7 -135,22 +136,22 @@@
   // use cpu findHomography interface to calculate the transformation matrix
   int main(int argc, char* argv[])
   {
- -        "{ h | help     | false           | print help message  }"
- -        "{ l | left     |                 | specify left image  }"
- -        "{ r | right    |                 | specify right image }"
- -        "{ o | output   | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }"
- -        "{ c | use_cpu  | false           | use CPU algorithms  }"
- -        "{ a | use_all  | false           | use both CPU and GPU algorithms}";
+     const char* keys =
- -        cmd.printParams();
++        "{ help h    | false           | print help message  }"
++        "{ left l    |                 | specify left image  }"
++        "{ right r   |                 | specify right image }"
++        "{ output o  | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }"
++        "{ use_cpu c | false           | use CPU algorithms  }"
++        "{ use_all a | false           | use both CPU and GPU algorithms}";
+     CommandLineParser cmd(argc, argv, keys);
+     if (cmd.get<bool>("help"))
+     {
+         std::cout << "Avaible options:" << std::endl;
- -    vector<cv::ocl::Info> info;
++        cmd.printMessage();
+         return 0;
+     }
+ 
+ +    std::vector<cv::ocl::Info> info;
       if(cv::ocl::getDevice(info) == 0)
       {
           std::cout << "Error: Did not find a valid OpenCL device!" << std::endl;
@@@ -196,48 -160,32 +161,32 @@@
   
       Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
       oclMat img1, img2;
-     bool useCPU = false;
+     bool useCPU = cmd.get<bool>("c");
       bool useGPU = false;
-     bool useALL = false;
+     bool useALL = cmd.get<bool>("a");
+ 
- -    string outpath = cmd.get<std::string>("o");
++    std::string outpath = cmd.get<std::string>("o");
+ 
+     cpu_img1 = imread(cmd.get<std::string>("l"));
+     CV_Assert(!cpu_img1.empty());
- -    cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
++    cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
+     img1 = cpu_img1_grey;
   
-     for (int i = 1; i < argc; ++i)
+     cpu_img2 = imread(cmd.get<std::string>("r"));
+     CV_Assert(!cpu_img2.empty());
- -    cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
++    cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
+     img2 = cpu_img2_grey;
+ 
+     if(useALL)
       {
-         if (String(argv[i]) == "--left")
-         {
-             cpu_img1 = imread(argv[++i]);
-             CV_Assert(!cpu_img1.empty());
-             cvtColor(cpu_img1, cpu_img1_grey, COLOR_BGR2GRAY);
-             img1 = cpu_img1_grey;
-         }
-         else if (String(argv[i]) == "--right")
-         {
-             cpu_img2 = imread(argv[++i]);
-             CV_Assert(!cpu_img2.empty());
-             cvtColor(cpu_img2, cpu_img2_grey, COLOR_BGR2GRAY);
-             img2 = cpu_img2_grey;
-         }
-         else if (String(argv[i]) == "-c")
-         {
-             useCPU = true;
-             useGPU = false;
-             useALL = false;
-         }else if(String(argv[i]) == "-g")
-         {
-             useGPU = true;
-             useCPU = false;
-             useALL = false;
-         }else if(String(argv[i]) == "-a")
-         {
-             useALL = true;
-             useCPU = false;
-             useGPU = false;
-         }
-         else if (String(argv[i]) == "--help")
-         {
-             help();
-             return -1;
-         }
+         useCPU = false;
+         useGPU = false;
+     }
+     else if(useCPU==false && useALL==false)
+     {
+         useGPU = true;
       }
+ 
       if(!useCPU)
       {
           std::cout
diff --cc samples/ocl/tvl1_optical_flow.cpp

index 0000000,cff9692..2b770e4

mode 000000,100644..100644
--- /dev/null
--- 2/samples/ocl/tvl1_optical_flow.cpp
+++ b/samples/ocl/tvl1_optical_flow.cpp
@@@ -1,0 -1,265 +1,264 @@@
- -        cmd.printParams();
+ #include <iostream>
+ #include <vector>
+ #include <iomanip>
+ 
++#include "opencv2/core/utility.hpp"
+ #include "opencv2/highgui/highgui.hpp"
+ #include "opencv2/ocl/ocl.hpp"
+ #include "opencv2/video/video.hpp"
+ 
+ using namespace std;
+ using namespace cv;
+ using namespace cv::ocl;
+ 
+ typedef unsigned char uchar;
+ #define LOOP_NUM 10
+ int64 work_begin = 0;
+ int64 work_end = 0;
+ 
+ static void workBegin()
+ {
+     work_begin = getTickCount();
+ }
+ static void workEnd()
+ {
+     work_end += (getTickCount() - work_begin);
+ }
+ static double getTime()
+ {
+     return work_end * 1000. / getTickFrequency();
+ }
+ 
+ template <typename T> inline T clamp (T x, T a, T b)
+ {
+     return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
+ }
+ 
+ template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
+ {
+     x = clamp(x, a, b);
+     return c + (d - c) * (x - a) / (b - a);
+ }
+ 
+ static void getFlowField(const Mat& u, const Mat& v, Mat& flowField)
+ {
+     float maxDisplacement = 1.0f;
+ 
+     for (int i = 0; i < u.rows; ++i)
+     {
+         const float* ptr_u = u.ptr<float>(i);
+         const float* ptr_v = v.ptr<float>(i);
+ 
+         for (int j = 0; j < u.cols; ++j)
+         {
+             float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j]));
+ 
+             if (d > maxDisplacement)
+                 maxDisplacement = d;
+         }
+     }
+ 
+     flowField.create(u.size(), CV_8UC4);
+ 
+     for (int i = 0; i < flowField.rows; ++i)
+     {
+         const float* ptr_u = u.ptr<float>(i);
+         const float* ptr_v = v.ptr<float>(i);
+ 
+ 
+         Vec4b* row = flowField.ptr<Vec4b>(i);
+ 
+         for (int j = 0; j < flowField.cols; ++j)
+         {
+             row[j][0] = 0;
+             row[j][1] = static_cast<unsigned char> (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+             row[j][2] = static_cast<unsigned char> (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f));
+             row[j][3] = 255;
+         }
+     }
+ }
+ 
+ 
+ int main(int argc, const char* argv[])
+ {
+     static std::vector<Info> ocl_info;
+     ocl::getDevice(ocl_info);
+     //if you want to use undefault device, set it here
+     setDevice(ocl_info[0]);
+ 
+     //set this to save kernel compile time from second time you run
+     ocl::setBinpath("./");
+     const char* keys =
+         "{ h   | help       | false           | print help message }"
+         "{ l   | left       |                 | specify left image }"
+         "{ r   | right      |                 | specify right image }"
+         "{ o   | output     | tvl1_output.jpg | specify output save path }"
+         "{ c   | camera     | 0               | enable camera capturing }"
+         "{ s   | use_cpu    | false           | use cpu or gpu to process the image }"
+         "{ v   | video      |                 | use video as input }";
+ 
+     CommandLineParser cmd(argc, argv, keys);
+ 
+     if (cmd.get<bool>("help"))
+     {
+         cout << "Usage: pyrlk_optical_flow [options]" << endl;
+         cout << "Avaible options:" << endl;
- -        CvCapture* capture = 0;
- -        capture = cvCaptureFromCAM( inputName );
- -        if (!capture)
++        cmd.printMessage();
+         return 0;
+     }
+ 
+     bool defaultPicturesFail = false;
+     string fname0 = cmd.get<string>("l");
+     string fname1 = cmd.get<string>("r");
+     string vdofile = cmd.get<string>("v");
+     string outpath = cmd.get<string>("o");
+     bool useCPU = cmd.get<bool>("s");
+     bool useCamera = cmd.get<bool>("c");
+     int inputName = cmd.get<int>("c");
+ 
+     Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
+     Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
+     cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+     cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+ 
+ 
+     Mat flow, show_flow;
+     Mat flow_vec[2];
+     if (frame0.empty() || frame1.empty())
+     {
+         useCamera = true;
+         defaultPicturesFail = true;
- -        CvCapture* capture = 0;
++        VideoCapture capture( inputName );
++        if (!capture.isOpened())
+         {
+             cout << "Can't load input images" << endl;
+             return -1;
+         }
+     }
+ 
+ 
+     if (useCamera)
+     {
- -            capture = cvCaptureFromCAM( inputName );
++        VideoCapture capture;
+         Mat frame, frameCopy;
+         Mat frame0Gray, frame1Gray;
+         Mat ptr0, ptr1;
+ 
+         if(vdofile == "")
- -            capture = cvCreateFileCapture(vdofile.c_str());
++            capture.open( inputName );
+         else
- -        if(!capture)
++            capture.open(vdofile.c_str());
+ 
+         int c = inputName ;
- -            frame = cvQueryFrame( capture );
- -            if( frame.empty() )
++        if(!capture.isOpened())
+         {
+             if(vdofile == "")
+                 cout << "Capture from CAM " << c << " didn't work" << endl;
+             else
+                 cout << "Capture from file " << vdofile << " failed" <<endl;
+             if (defaultPicturesFail)
+             {
+                 return -1;
+             }
+             goto nocamera;
+         }
+ 
+         cout << "In capture ..." << endl;
+         for(int i = 0;; i++)
+         {
- -        cvReleaseCapture( &capture );
++            if( !capture.read(frame) )
+                 break;
+ 
+             if (i == 0)
+             {
+                 frame.copyTo( frame0 );
+                 cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+             }
+             else
+             {
+                 if (i%2 == 1)
+                 {
+                     frame.copyTo(frame1);
+                     cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+                     ptr0 = frame0Gray;
+                     ptr1 = frame1Gray;
+                 }
+                 else
+                 {
+                     frame.copyTo(frame0);
+                     cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+                     ptr0 = frame1Gray;
+                     ptr1 = frame0Gray;
+                 }
+ 
+                 if (useCPU)
+                 {
+                     alg->calc(ptr0, ptr1, flow);
+                     split(flow, flow_vec);
+                 }
+                 else
+                 {
+                     oclMat d_flowx, d_flowy;
+                     d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy);
+                     d_flowx.download(flow_vec[0]);
+                     d_flowy.download(flow_vec[1]);
+                 }
+                 if (i%2 == 1)
+                     frame1.copyTo(frameCopy);
+                 else
+                     frame0.copyTo(frameCopy);
+                 getFlowField(flow_vec[0], flow_vec[1], show_flow);
+                 imshow("PyrLK [Sparse]", show_flow);
+             }
+ 
+             if( waitKey( 10 ) >= 0 )
+                 goto _cleanup_;
+         }
+ 
+         waitKey(0);
+ 
+ _cleanup_:
++        capture.release();
+     }
+     else
+     {
+ nocamera:
+         oclMat d_flowx, d_flowy;
+         for(int i = 0; i <= LOOP_NUM; i ++)
+         {
+             cout << "loop" << i << endl;
+ 
+             if (i > 0) workBegin();
+             if (useCPU)
+             {
+                 alg->calc(frame0, frame1, flow);
+                 split(flow, flow_vec);
+             }
+             else
+             {
+                 d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+                 d_flowx.download(flow_vec[0]);
+                 d_flowy.download(flow_vec[1]);
+             }
+             if (i > 0 && i <= LOOP_NUM)
+                 workEnd();
+ 
+             if (i == LOOP_NUM)
+             {
+                 if (useCPU)
+                     cout << "average CPU time (noCamera) : ";
+                 else
+                     cout << "average GPU time (noCamera) : ";
+                 cout << getTime() / LOOP_NUM << " ms" << endl;
+ 
+                 getFlowField(flow_vec[0], flow_vec[1], show_flow);
+                 imshow("PyrLK [Sparse]", show_flow);
+                 imwrite(outpath, show_flow);
+             }
+         }
+     }
+ 
+     waitKey();
+ 
+     return 0;
+ }
author	Roman Donchenko <roman.donchenko@itseez.com>
	Tue, 25 Jun 2013 10:08:23 +0000 (14:08 +0400)
committer	Roman Donchenko <roman.donchenko@itseez.com>
	Tue, 25 Jun 2013 11:55:52 +0000 (15:55 +0400)
		1	2
modules/calib3d/include/opencv2/calib3d.hpp	patch \|	diff1 \|	\|	blob \| history
modules/core/include/opencv2/core/cuda/limits.hpp	patch \|	diff1 \|	\|	blob \| history
modules/core/include/opencv2/core/mat.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/include/opencv2/core/private.hpp	patch \|	diff1 \|	\|	blob \| history
modules/core/src/matrix.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/gpulegacy/include/opencv2/gpulegacy/NCV.hpp	patch \|	diff1 \|	\|	blob \| history
modules/gpulegacy/src/cuda/NCVPixelOperations.hpp	patch \|	diff1 \|	\|	blob \| history
modules/highgui/include/opencv2/highgui/cap_ios.h	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/src/window_QT.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java	patch \|	diff1 \|	diff2 \|	blob \| history
modules/nonfree/test/test_features2d.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/include/opencv2/ocl.hpp	patch \|	diff1 \|	\|	blob \| history
modules/ocl/include/opencv2/ocl/private/util.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/perf/main.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/perf/perf_calib3d.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/perf/perf_imgproc.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/perf/precomp.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/hog.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/matrix_operations.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/mcwutil.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/opencl/objdetect_hog.cl	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_imgproc.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/test_objdetect.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/utility.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/test/utility.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ts/src/ts_func.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/ocl/facedetect.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/ocl/hog.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/ocl/pyrlk_optical_flow.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/ocl/squares.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/ocl/stereo_match.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/ocl/surf_matcher.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/ocl/tvl1_optical_flow.cpp	patch \|	\|	diff2 \|	blob \| history