Merge remote-tracking branch 'origin/2.4' into merge-2.4

author Roman Donchenko <roman.donchenko@itseez.com>

Mon, 23 Dec 2013 14:50:17 +0000 (18:50 +0400)

committer Roman Donchenko <roman.donchenko@itseez.com>

Mon, 23 Dec 2013 14:50:17 +0000 (18:50 +0400)
author Roman Donchenko <roman.donchenko@itseez.com>
Mon, 23 Dec 2013 14:50:17 +0000 (18:50 +0400)
committer Roman Donchenko <roman.donchenko@itseez.com>
Mon, 23 Dec 2013 14:50:17 +0000 (18:50 +0400)
diff --cc CMakeLists.txt

index 4fff2d6,3f793f1..2fb1cf7
--- 1/CMakeLists.txt
--- 2/CMakeLists.txt
+++ b/CMakeLists.txt
@@@ -155,9 -163,8 +155,10 @@@ OCV_OPTION(WITH_CLP            "Includ
   OCV_OPTION(WITH_OPENCL         "Include OpenCL Runtime support"              ON   IF (NOT IOS) )
   OCV_OPTION(WITH_OPENCLAMDFFT   "Include AMD OpenCL FFT library support"      ON   IF (NOT ANDROID AND NOT IOS) )
   OCV_OPTION(WITH_OPENCLAMDBLAS  "Include AMD OpenCL BLAS library support"     ON   IF (NOT ANDROID AND NOT IOS) )
+ +OCV_OPTION(WITH_DIRECTX        "Include DirectX support"                     ON   IF WIN32 )
+ OCV_OPTION(WITH_INTELPERC      "Include Intel Perceptual Computing support"  OFF  IF WIN32 )
   
+ +
   # OpenCV build components
   # ===================================================
   OCV_OPTION(BUILD_SHARED_LIBS        "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)" NOT (ANDROID OR IOS) )
diff --cc cmake/OpenCVCompilerOptions.cmake
Simple merge
diff --cc cmake/OpenCVFindLibsVideo.cmake
Simple merge
diff --cc cmake/templates/cvconfig.h.in

index 554b91c,a6cee63..e1beaad
--- 1/cmake/templates/cvconfig.h.in
--- 2/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@@@ -85,9 -76,18 +85,12 @@@
   /* GTK+ 2.x toolkit */
   #cmakedefine HAVE_GTK
   
- -/* Apple ImageIO Framework */
- -#cmakedefine HAVE_IMAGEIO
+ +/* Define to 1 if you have the <inttypes.h> header file. */
+ +#cmakedefine HAVE_INTTYPES_H 1
   
+ /* Intel Perceptual Computing SDK library */
+ #cmakedefine HAVE_INTELPERC
+ 
   /* Intel Integrated Performance Primitives */
   #cmakedefine HAVE_IPP
   
diff --cc modules/core/doc/operations_on_arrays.rst
Simple merge
diff --cc modules/core/src/dxt.cpp
Simple merge
diff --cc modules/core/src/matrix.cpp
Simple merge
diff --cc modules/highgui/CMakeLists.txt
Simple merge
diff --cc modules/highgui/include/opencv2/highgui.hpp

index cebf8fe,0000000..eb4ee8c

mode 100644,000000..100644
--- 1/modules/highgui/include/opencv2/highgui.hpp
--- /dev/null
+++ b/modules/highgui/include/opencv2/highgui.hpp
@@@ -1,553 -1,0 +1,574 @@@
-        CAP_MSMF         = 1400   // Microsoft Media Foundation (via videoInput)
+ +/*M///////////////////////////////////////////////////////////////////////////////////////
+ +//
+ +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ +//
+ +//  By downloading, copying, installing or using the software you agree to this license.
+ +//  If you do not agree to this license, do not download, install,
+ +//  copy or use the software.
+ +//
+ +//
+ +//                          License Agreement
+ +//                For Open Source Computer Vision Library
+ +//
+ +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +//
+ +// Redistribution and use in source and binary forms, with or without modification,
+ +// are permitted provided that the following conditions are met:
+ +//
+ +//   * Redistribution's of source code must retain the above copyright notice,
+ +//     this list of conditions and the following disclaimer.
+ +//
+ +//   * Redistribution's in binary form must reproduce the above copyright notice,
+ +//     this list of conditions and the following disclaimer in the documentation
+ +//     and/or other materials provided with the distribution.
+ +//
+ +//   * The name of the copyright holders may not be used to endorse or promote products
+ +//     derived from this software without specific prior written permission.
+ +//
+ +// This software is provided by the copyright holders and contributors "as is" and
+ +// any express or implied warranties, including, but not limited to, the implied
+ +// warranties of merchantability and fitness for a particular purpose are disclaimed.
+ +// In no event shall the Intel Corporation or contributors be liable for any direct,
+ +// indirect, incidental, special, exemplary, or consequential damages
+ +// (including, but not limited to, procurement of substitute goods or services;
+ +// loss of use, data, or profits; or business interruption) however caused
+ +// and on any theory of liability, whether in contract, strict liability,
+ +// or tort (including negligence or otherwise) arising in any way out of
+ +// the use of this software, even if advised of the possibility of such damage.
+ +//
+ +//M*/
+ +
+ +#ifndef __OPENCV_HIGHGUI_HPP__
+ +#define __OPENCV_HIGHGUI_HPP__
+ +
+ +#include "opencv2/core.hpp"
+ +
+ +
+ +///////////////////////// graphical user interface //////////////////////////
+ +namespace cv
+ +{
+ +
+ +// Flags for namedWindow
+ +enum { WINDOW_NORMAL     = 0x00000000, // the user can resize the window (no constraint) / also use to switch a fullscreen window to a normal size
+ +       WINDOW_AUTOSIZE   = 0x00000001, // the user cannot resize the window, the size is constrainted by the image displayed
+ +       WINDOW_OPENGL     = 0x00001000, // window with opengl support
+ +
+ +       WINDOW_FULLSCREEN = 1,          // change the window to fullscreen
+ +       WINDOW_FREERATIO  = 0x00000100, // the image expends as much as it can (no ratio constraint)
+ +       WINDOW_KEEPRATIO  = 0x00000000  // the ratio of the image is respected
+ +     };
+ +
+ +// Flags for set / getWindowProperty
+ +enum { WND_PROP_FULLSCREEN   = 0, // fullscreen property    (can be WINDOW_NORMAL or WINDOW_FULLSCREEN)
+ +       WND_PROP_AUTOSIZE     = 1, // autosize property      (can be WINDOW_NORMAL or WINDOW_AUTOSIZE)
+ +       WND_PROP_ASPECT_RATIO = 2, // window's aspect ration (can be set to WINDOW_FREERATIO or WINDOW_KEEPRATIO);
+ +       WND_PROP_OPENGL       = 3  // opengl support
+ +     };
+ +
+ +enum { EVENT_MOUSEMOVE      = 0,
+ +       EVENT_LBUTTONDOWN    = 1,
+ +       EVENT_RBUTTONDOWN    = 2,
+ +       EVENT_MBUTTONDOWN    = 3,
+ +       EVENT_LBUTTONUP      = 4,
+ +       EVENT_RBUTTONUP      = 5,
+ +       EVENT_MBUTTONUP      = 6,
+ +       EVENT_LBUTTONDBLCLK  = 7,
+ +       EVENT_RBUTTONDBLCLK  = 8,
+ +       EVENT_MBUTTONDBLCLK  = 9
+ +     };
+ +
+ +enum { EVENT_FLAG_LBUTTON   = 1,
+ +       EVENT_FLAG_RBUTTON   = 2,
+ +       EVENT_FLAG_MBUTTON   = 4,
+ +       EVENT_FLAG_CTRLKEY   = 8,
+ +       EVENT_FLAG_SHIFTKEY  = 16,
+ +       EVENT_FLAG_ALTKEY    = 32
+ +     };
+ +
+ +// Qt font
+ +enum {  QT_FONT_LIGHT           = 25, //QFont::Light,
+ +        QT_FONT_NORMAL          = 50, //QFont::Normal,
+ +        QT_FONT_DEMIBOLD        = 63, //QFont::DemiBold,
+ +        QT_FONT_BOLD            = 75, //QFont::Bold,
+ +        QT_FONT_BLACK           = 87  //QFont::Black
+ +     };
+ +
+ +// Qt font style
+ +enum {  QT_STYLE_NORMAL         = 0, //QFont::StyleNormal,
+ +        QT_STYLE_ITALIC         = 1, //QFont::StyleItalic,
+ +        QT_STYLE_OBLIQUE        = 2  //QFont::StyleOblique
+ +     };
+ +
+ +// Qt "button" type
+ +enum { QT_PUSH_BUTTON = 0,
+ +       QT_CHECKBOX    = 1,
+ +       QT_RADIOBOX    = 2
+ +     };
+ +
+ +
+ +typedef void (*MouseCallback)(int event, int x, int y, int flags, void* userdata);
+ +typedef void (*TrackbarCallback)(int pos, void* userdata);
+ +typedef void (*OpenGlDrawCallback)(void* userdata);
+ +typedef void (*ButtonCallback)(int state, void* userdata);
+ +
+ +
+ +CV_EXPORTS_W void namedWindow(const String& winname, int flags = WINDOW_AUTOSIZE);
+ +
+ +CV_EXPORTS_W void destroyWindow(const String& winname);
+ +
+ +CV_EXPORTS_W void destroyAllWindows();
+ +
+ +CV_EXPORTS_W int startWindowThread();
+ +
+ +CV_EXPORTS_W int waitKey(int delay = 0);
+ +
+ +CV_EXPORTS_W void imshow(const String& winname, InputArray mat);
+ +
+ +CV_EXPORTS_W void resizeWindow(const String& winname, int width, int height);
+ +
+ +CV_EXPORTS_W void moveWindow(const String& winname, int x, int y);
+ +
+ +CV_EXPORTS_W void setWindowProperty(const String& winname, int prop_id, double prop_value);
+ +
+ +CV_EXPORTS_W double getWindowProperty(const String& winname, int prop_id);
+ +
+ +//! assigns callback for mouse events
+ +CV_EXPORTS void setMouseCallback(const String& winname, MouseCallback onMouse, void* userdata = 0);
+ +
+ +CV_EXPORTS int createTrackbar(const String& trackbarname, const String& winname,
+ +                              int* value, int count,
+ +                              TrackbarCallback onChange = 0,
+ +                              void* userdata = 0);
+ +
+ +CV_EXPORTS_W int getTrackbarPos(const String& trackbarname, const String& winname);
+ +
+ +CV_EXPORTS_W void setTrackbarPos(const String& trackbarname, const String& winname, int pos);
+ +
+ +
+ +// OpenGL support
+ +CV_EXPORTS void imshow(const String& winname, const ogl::Texture2D& tex);
+ +
+ +CV_EXPORTS void setOpenGlDrawCallback(const String& winname, OpenGlDrawCallback onOpenGlDraw, void* userdata = 0);
+ +
+ +CV_EXPORTS void setOpenGlContext(const String& winname);
+ +
+ +CV_EXPORTS void updateWindow(const String& winname);
+ +
+ +
+ +// Only for Qt
+ +
+ +struct QtFont
+ +{
+ +    const char* nameFont;  // Qt: nameFont
+ +    Scalar      color;     // Qt: ColorFont -> cvScalar(blue_component, green_component, red\_component[, alpha_component])
+ +    int         font_face; // Qt: bool italic
+ +    const int*  ascii;     // font data and metrics
+ +    const int*  greek;
+ +    const int*  cyrillic;
+ +    float       hscale, vscale;
+ +    float       shear;     // slope coefficient: 0 - normal, >0 - italic
+ +    int         thickness; // Qt: weight
+ +    float       dx;        // horizontal interval between letters
+ +    int         line_type; // Qt: PointSize
+ +};
+ +
+ +CV_EXPORTS QtFont fontQt(const String& nameFont, int pointSize = -1,
+ +                         Scalar color = Scalar::all(0), int weight = QT_FONT_NORMAL,
+ +                         int style = QT_STYLE_NORMAL, int spacing = 0);
+ +
+ +CV_EXPORTS void addText( const Mat& img, const String& text, Point org, const QtFont& font);
+ +
+ +CV_EXPORTS void displayOverlay(const String& winname, const String& text, int delayms = 0);
+ +
+ +CV_EXPORTS void displayStatusBar(const String& winname, const String& text, int delayms = 0);
+ +
+ +CV_EXPORTS void saveWindowParameters(const String& windowName);
+ +
+ +CV_EXPORTS void loadWindowParameters(const String& windowName);
+ +
+ +CV_EXPORTS  int startLoop(int (*pt2Func)(int argc, char *argv[]), int argc, char* argv[]);
+ +
+ +CV_EXPORTS  void stopLoop();
+ +
+ +CV_EXPORTS int createButton( const String& bar_name, ButtonCallback on_change,
+ +                             void* userdata = 0, int type = QT_PUSH_BUTTON,
+ +                             bool initial_button_state = false);
+ +
+ +} // cv
+ +
+ +
+ +
+ +//////////////////////////////// image codec ////////////////////////////////
+ +namespace cv
+ +{
+ +
+ +enum { IMREAD_UNCHANGED  = -1, // 8bit, color or not
+ +       IMREAD_GRAYSCALE  = 0,  // 8bit, gray
+ +       IMREAD_COLOR      = 1,  // ?, color
+ +       IMREAD_ANYDEPTH   = 2,  // any depth, ?
+ +       IMREAD_ANYCOLOR   = 4   // ?, any color
+ +     };
+ +
+ +enum { IMWRITE_JPEG_QUALITY    = 1,
+ +       IMWRITE_PNG_COMPRESSION = 16,
+ +       IMWRITE_PNG_STRATEGY    = 17,
+ +       IMWRITE_PNG_BILEVEL     = 18,
+ +       IMWRITE_PXM_BINARY      = 32,
+ +       IMWRITE_WEBP_QUALITY    = 64
+ +     };
+ +
+ +enum { IMWRITE_PNG_STRATEGY_DEFAULT      = 0,
+ +       IMWRITE_PNG_STRATEGY_FILTERED     = 1,
+ +       IMWRITE_PNG_STRATEGY_HUFFMAN_ONLY = 2,
+ +       IMWRITE_PNG_STRATEGY_RLE          = 3,
+ +       IMWRITE_PNG_STRATEGY_FIXED        = 4
+ +     };
+ +
+ +CV_EXPORTS_W Mat imread( const String& filename, int flags = IMREAD_COLOR );
+ +
+ +CV_EXPORTS_W bool imwrite( const String& filename, InputArray img,
+ +              const std::vector<int>& params = std::vector<int>());
+ +
+ +CV_EXPORTS_W Mat imdecode( InputArray buf, int flags );
+ +
+ +CV_EXPORTS Mat imdecode( InputArray buf, int flags, Mat* dst);
+ +
+ +CV_EXPORTS_W bool imencode( const String& ext, InputArray img,
+ +                            CV_OUT std::vector<uchar>& buf,
+ +                            const std::vector<int>& params = std::vector<int>());
+ +
+ +} // cv
+ +
+ +
+ +
+ +////////////////////////////////// video io /////////////////////////////////
+ +
+ +typedef struct CvCapture CvCapture;
+ +typedef struct CvVideoWriter CvVideoWriter;
+ +
+ +namespace cv
+ +{
+ +
+ +// Camera API
+ +enum { CAP_ANY          = 0,     // autodetect
+ +       CAP_VFW          = 200,   // platform native
+ +       CAP_V4L          = 200,
+ +       CAP_V4L2         = CAP_V4L,
+ +       CAP_FIREWARE     = 300,   // IEEE 1394 drivers
+ +       CAP_FIREWIRE     = CAP_FIREWARE,
+ +       CAP_IEEE1394     = CAP_FIREWARE,
+ +       CAP_DC1394       = CAP_FIREWARE,
+ +       CAP_CMU1394      = CAP_FIREWARE,
+ +       CAP_QT           = 500,   // QuickTime
+ +       CAP_UNICAP       = 600,   // Unicap drivers
+ +       CAP_DSHOW        = 700,   // DirectShow (via videoInput)
+ +       CAP_PVAPI        = 800,   // PvAPI, Prosilica GigE SDK
+ +       CAP_OPENNI       = 900,   // OpenNI (for Kinect)
+ +       CAP_OPENNI_ASUS  = 910,   // OpenNI (for Asus Xtion)
+ +       CAP_ANDROID      = 1000,  // Android
+ +       CAP_XIAPI        = 1100,  // XIMEA Camera API
+ +       CAP_AVFOUNDATION = 1200,  // AVFoundation framework for iOS (OS X Lion will have the same API)
+ +       CAP_GIGANETIX    = 1300,  // Smartek Giganetix GigEVisionSDK
++       CAP_MSMF         = 1400,  // Microsoft Media Foundation (via videoInput)
++       CAP_INTELPERC    = 1500   // Intel Perceptual Computing SDK
+ +     };
+ +
+ +// generic properties (based on DC1394 properties)
+ +enum { CAP_PROP_POS_MSEC       =0,
+ +       CAP_PROP_POS_FRAMES     =1,
+ +       CAP_PROP_POS_AVI_RATIO  =2,
+ +       CAP_PROP_FRAME_WIDTH    =3,
+ +       CAP_PROP_FRAME_HEIGHT   =4,
+ +       CAP_PROP_FPS            =5,
+ +       CAP_PROP_FOURCC         =6,
+ +       CAP_PROP_FRAME_COUNT    =7,
+ +       CAP_PROP_FORMAT         =8,
+ +       CAP_PROP_MODE           =9,
+ +       CAP_PROP_BRIGHTNESS    =10,
+ +       CAP_PROP_CONTRAST      =11,
+ +       CAP_PROP_SATURATION    =12,
+ +       CAP_PROP_HUE           =13,
+ +       CAP_PROP_GAIN          =14,
+ +       CAP_PROP_EXPOSURE      =15,
+ +       CAP_PROP_CONVERT_RGB   =16,
+ +       CAP_PROP_WHITE_BALANCE_BLUE_U =17,
+ +       CAP_PROP_RECTIFICATION =18,
+ +       CAP_PROP_MONOCROME     =19,
+ +       CAP_PROP_SHARPNESS     =20,
+ +       CAP_PROP_AUTO_EXPOSURE =21, // DC1394: exposure control done by camera, user can adjust refernce level using this feature
+ +       CAP_PROP_GAMMA         =22,
+ +       CAP_PROP_TEMPERATURE   =23,
+ +       CAP_PROP_TRIGGER       =24,
+ +       CAP_PROP_TRIGGER_DELAY =25,
+ +       CAP_PROP_WHITE_BALANCE_RED_V =26,
+ +       CAP_PROP_ZOOM          =27,
+ +       CAP_PROP_FOCUS         =28,
+ +       CAP_PROP_GUID          =29,
+ +       CAP_PROP_ISO_SPEED     =30,
+ +       CAP_PROP_BACKLIGHT     =32,
+ +       CAP_PROP_PAN           =33,
+ +       CAP_PROP_TILT          =34,
+ +       CAP_PROP_ROLL          =35,
+ +       CAP_PROP_IRIS          =36,
+ +       CAP_PROP_SETTINGS      =37
+ +     };
+ +
+ +
+ +// DC1394 only
+ +// modes of the controlling registers (can be: auto, manual, auto single push, absolute Latter allowed with any other mode)
+ +// every feature can have only one mode turned on at a time
+ +enum { CAP_PROP_DC1394_OFF                = -4, //turn the feature off (not controlled manually nor automatically)
+ +       CAP_PROP_DC1394_MODE_MANUAL        = -3, //set automatically when a value of the feature is set by the user
+ +       CAP_PROP_DC1394_MODE_AUTO          = -2,
+ +       CAP_PROP_DC1394_MODE_ONE_PUSH_AUTO = -1,
+ +       CAP_PROP_DC1394_MAX                = 31
+ +     };
+ +
+ +
+ +// OpenNI map generators
+ +enum { CAP_OPENNI_DEPTH_GENERATOR = 1 << 31,
+ +       CAP_OPENNI_IMAGE_GENERATOR = 1 << 30,
+ +       CAP_OPENNI_GENERATORS_MASK = CAP_OPENNI_DEPTH_GENERATOR + CAP_OPENNI_IMAGE_GENERATOR
+ +     };
+ +
+ +// Properties of cameras available through OpenNI interfaces
+ +enum { CAP_PROP_OPENNI_OUTPUT_MODE       = 100,
+ +       CAP_PROP_OPENNI_FRAME_MAX_DEPTH   = 101, // in mm
+ +       CAP_PROP_OPENNI_BASELINE          = 102, // in mm
+ +       CAP_PROP_OPENNI_FOCAL_LENGTH      = 103, // in pixels
+ +       CAP_PROP_OPENNI_REGISTRATION      = 104, // flag that synchronizes the remapping depth map to image map
+ +                                                // by changing depth generator's view point (if the flag is "on") or
+ +                                                // sets this view point to its normal one (if the flag is "off").
+ +       CAP_PROP_OPENNI_REGISTRATION_ON   = CAP_PROP_OPENNI_REGISTRATION,
+ +       CAP_PROP_OPENNI_APPROX_FRAME_SYNC = 105,
+ +       CAP_PROP_OPENNI_MAX_BUFFER_SIZE   = 106,
+ +       CAP_PROP_OPENNI_CIRCLE_BUFFER     = 107,
+ +       CAP_PROP_OPENNI_MAX_TIME_DURATION = 108,
+ +       CAP_PROP_OPENNI_GENERATOR_PRESENT = 109
+ +     };
+ +
+ +// OpenNI shortcats
+ +enum { CAP_OPENNI_IMAGE_GENERATOR_PRESENT         = CAP_OPENNI_IMAGE_GENERATOR + CAP_PROP_OPENNI_GENERATOR_PRESENT,
+ +       CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE     = CAP_OPENNI_IMAGE_GENERATOR + CAP_PROP_OPENNI_OUTPUT_MODE,
+ +       CAP_OPENNI_DEPTH_GENERATOR_BASELINE        = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_BASELINE,
+ +       CAP_OPENNI_DEPTH_GENERATOR_FOCAL_LENGTH    = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_FOCAL_LENGTH,
+ +       CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION    = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_REGISTRATION,
+ +       CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION_ON = CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION
+ +     };
+ +
+ +// OpenNI data given from depth generator
+ +enum { CAP_OPENNI_DEPTH_MAP         = 0, // Depth values in mm (CV_16UC1)
+ +       CAP_OPENNI_POINT_CLOUD_MAP   = 1, // XYZ in meters (CV_32FC3)
+ +       CAP_OPENNI_DISPARITY_MAP     = 2, // Disparity in pixels (CV_8UC1)
+ +       CAP_OPENNI_DISPARITY_MAP_32F = 3, // Disparity in pixels (CV_32FC1)
+ +       CAP_OPENNI_VALID_DEPTH_MASK  = 4, // CV_8UC1
+ +
+ +       // Data given from RGB image generator
+ +       CAP_OPENNI_BGR_IMAGE         = 5,
+ +       CAP_OPENNI_GRAY_IMAGE        = 6
+ +     };
+ +
+ +// Supported output modes of OpenNI image generator
+ +enum { CAP_OPENNI_VGA_30HZ  = 0,
+ +       CAP_OPENNI_SXGA_15HZ = 1,
+ +       CAP_OPENNI_SXGA_30HZ = 2,
+ +       CAP_OPENNI_QVGA_30HZ = 3,
+ +       CAP_OPENNI_QVGA_60HZ = 4
+ +     };
+ +
+ +
+ +// GStreamer
+ +enum { CAP_PROP_GSTREAMER_QUEUE_LENGTH = 200 // default is 1
+ +     };
+ +
+ +
+ +// PVAPI
+ +enum { CAP_PROP_PVAPI_MULTICASTIP = 300 // ip for anable multicast master mode. 0 for disable multicast
+ +     };
+ +
+ +
+ +// Properties of cameras available through XIMEA SDK interface
+ +enum { CAP_PROP_XI_DOWNSAMPLING  = 400, // Change image resolution by binning or skipping.
+ +       CAP_PROP_XI_DATA_FORMAT   = 401, // Output data format.
+ +       CAP_PROP_XI_OFFSET_X      = 402, // Horizontal offset from the origin to the area of interest (in pixels).
+ +       CAP_PROP_XI_OFFSET_Y      = 403, // Vertical offset from the origin to the area of interest (in pixels).
+ +       CAP_PROP_XI_TRG_SOURCE    = 404, // Defines source of trigger.
+ +       CAP_PROP_XI_TRG_SOFTWARE  = 405, // Generates an internal trigger. PRM_TRG_SOURCE must be set to TRG_SOFTWARE.
+ +       CAP_PROP_XI_GPI_SELECTOR  = 406, // Selects general purpose input
+ +       CAP_PROP_XI_GPI_MODE      = 407, // Set general purpose input mode
+ +       CAP_PROP_XI_GPI_LEVEL     = 408, // Get general purpose level
+ +       CAP_PROP_XI_GPO_SELECTOR  = 409, // Selects general purpose output
+ +       CAP_PROP_XI_GPO_MODE      = 410, // Set general purpose output mode
+ +       CAP_PROP_XI_LED_SELECTOR  = 411, // Selects camera signalling LED
+ +       CAP_PROP_XI_LED_MODE      = 412, // Define camera signalling LED functionality
+ +       CAP_PROP_XI_MANUAL_WB     = 413, // Calculates White Balance(must be called during acquisition)
+ +       CAP_PROP_XI_AUTO_WB       = 414, // Automatic white balance
+ +       CAP_PROP_XI_AEAG          = 415, // Automatic exposure/gain
+ +       CAP_PROP_XI_EXP_PRIORITY  = 416, // Exposure priority (0.5 - exposure 50%, gain 50%).
+ +       CAP_PROP_XI_AE_MAX_LIMIT  = 417, // Maximum limit of exposure in AEAG procedure
+ +       CAP_PROP_XI_AG_MAX_LIMIT  = 418, // Maximum limit of gain in AEAG procedure
+ +       CAP_PROP_XI_AEAG_LEVEL    = 419, // Average intensity of output signal AEAG should achieve(in %)
+ +       CAP_PROP_XI_TIMEOUT       = 420  // Image capture timeout in milliseconds
+ +     };
+ +
+ +
+ +// Properties for Android cameras
+ +enum { CAP_PROP_ANDROID_AUTOGRAB               = 1024,
+ +       CAP_PROP_ANDROID_PREVIEW_SIZES_STRING   = 1025, // readonly, tricky property, returns const char* indeed
+ +       CAP_PROP_ANDROID_PREVIEW_FORMAT         = 1026, // readonly, tricky property, returns const char* indeed
+ +       CAP_PROP_ANDROID_FLASH_MODE             = 8001,
+ +       CAP_PROP_ANDROID_FOCUS_MODE             = 8002,
+ +       CAP_PROP_ANDROID_WHITE_BALANCE          = 8003,
+ +       CAP_PROP_ANDROID_ANTIBANDING            = 8004,
+ +       CAP_PROP_ANDROID_FOCAL_LENGTH           = 8005,
+ +       CAP_PROP_ANDROID_FOCUS_DISTANCE_NEAR    = 8006,
+ +       CAP_PROP_ANDROID_FOCUS_DISTANCE_OPTIMAL = 8007,
+ +       CAP_PROP_ANDROID_FOCUS_DISTANCE_FAR     = 8008
+ +     };
+ +
+ +
+ +// Android camera output formats
+ +enum { CAP_ANDROID_COLOR_FRAME_BGR  = 0, //BGR
+ +       CAP_ANDROID_COLOR_FRAME      = CAP_ANDROID_COLOR_FRAME_BGR,
+ +       CAP_ANDROID_GREY_FRAME       = 1,  //Y
+ +       CAP_ANDROID_COLOR_FRAME_RGB  = 2,
+ +       CAP_ANDROID_COLOR_FRAME_BGRA = 3,
+ +       CAP_ANDROID_COLOR_FRAME_RGBA = 4
+ +     };
+ +
+ +
+ +// Android camera flash modes
+ +enum { CAP_ANDROID_FLASH_MODE_AUTO     = 0,
+ +       CAP_ANDROID_FLASH_MODE_OFF      = 1,
+ +       CAP_ANDROID_FLASH_MODE_ON       = 2,
+ +       CAP_ANDROID_FLASH_MODE_RED_EYE  = 3,
+ +       CAP_ANDROID_FLASH_MODE_TORCH    = 4
+ +     };
+ +
+ +
+ +// Android camera focus modes
+ +enum { CAP_ANDROID_FOCUS_MODE_AUTO             = 0,
+ +       CAP_ANDROID_FOCUS_MODE_CONTINUOUS_VIDEO = 1,
+ +       CAP_ANDROID_FOCUS_MODE_EDOF             = 2,
+ +       CAP_ANDROID_FOCUS_MODE_FIXED            = 3,
+ +       CAP_ANDROID_FOCUS_MODE_INFINITY         = 4,
+ +       CAP_ANDROID_FOCUS_MODE_MACRO            = 5
+ +     };
+ +
+ +
+ +// Android camera white balance modes
+ +enum { CAP_ANDROID_WHITE_BALANCE_AUTO             = 0,
+ +       CAP_ANDROID_WHITE_BALANCE_CLOUDY_DAYLIGHT  = 1,
+ +       CAP_ANDROID_WHITE_BALANCE_DAYLIGHT         = 2,
+ +       CAP_ANDROID_WHITE_BALANCE_FLUORESCENT      = 3,
+ +       CAP_ANDROID_WHITE_BALANCE_INCANDESCENT     = 4,
+ +       CAP_ANDROID_WHITE_BALANCE_SHADE            = 5,
+ +       CAP_ANDROID_WHITE_BALANCE_TWILIGHT         = 6,
+ +       CAP_ANDROID_WHITE_BALANCE_WARM_FLUORESCENT = 7
+ +     };
+ +
+ +
+ +// Android camera antibanding modes
+ +enum { CAP_ANDROID_ANTIBANDING_50HZ = 0,
+ +       CAP_ANDROID_ANTIBANDING_60HZ = 1,
+ +       CAP_ANDROID_ANTIBANDING_AUTO = 2,
+ +       CAP_ANDROID_ANTIBANDING_OFF  = 3
+ +     };
+ +
+ +
+ +// Properties of cameras available through AVFOUNDATION interface
+ +enum { CAP_PROP_IOS_DEVICE_FOCUS        = 9001,
+ +       CAP_PROP_IOS_DEVICE_EXPOSURE     = 9002,
+ +       CAP_PROP_IOS_DEVICE_FLASH        = 9003,
+ +       CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
+ +       CAP_PROP_IOS_DEVICE_TORCH        = 9005
+ +     };
+ +
+ +
+ +// Properties of cameras available through Smartek Giganetix Ethernet Vision interface
+ +/* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
+ +enum { CAP_PROP_GIGA_FRAME_OFFSET_X   = 10001,
+ +       CAP_PROP_GIGA_FRAME_OFFSET_Y   = 10002,
+ +       CAP_PROP_GIGA_FRAME_WIDTH_MAX  = 10003,
+ +       CAP_PROP_GIGA_FRAME_HEIGH_MAX  = 10004,
+ +       CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
+ +       CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
+ +     };
+ +
++enum { CAP_PROP_INTELPERC_PROFILE_COUNT               = 11001,
++       CAP_PROP_INTELPERC_PROFILE_IDX                 = 11002,
++       CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE  = 11003,
++       CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE      = 11004,
++       CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD  = 11005,
++       CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ     = 11006,
++       CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT     = 11007
++     };
++
++// Intel PerC streams
++enum { CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
++       CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
++       CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR
++     };
++
++enum { CAP_INTELPERC_DEPTH_MAP              = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
++       CAP_INTELPERC_UVDEPTH_MAP            = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
++       CAP_INTELPERC_IR_MAP                 = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
++       CAP_INTELPERC_IMAGE                  = 3
++     };
+ +
+ +class CV_EXPORTS_W VideoCapture
+ +{
+ +public:
+ +    CV_WRAP VideoCapture();
+ +    CV_WRAP VideoCapture(const String& filename);
+ +    CV_WRAP VideoCapture(int device);
+ +
+ +    virtual ~VideoCapture();
+ +    CV_WRAP virtual bool open(const String& filename);
+ +    CV_WRAP virtual bool open(int device);
+ +    CV_WRAP virtual bool isOpened() const;
+ +    CV_WRAP virtual void release();
+ +
+ +    CV_WRAP virtual bool grab();
+ +    CV_WRAP virtual bool retrieve(OutputArray image, int flag = 0);
+ +    virtual VideoCapture& operator >> (CV_OUT Mat& image);
+ +    virtual VideoCapture& operator >> (CV_OUT UMat& image);
+ +    CV_WRAP virtual bool read(OutputArray image);
+ +
+ +    CV_WRAP virtual bool set(int propId, double value);
+ +    CV_WRAP virtual double get(int propId);
+ +
+ +protected:
+ +    Ptr<CvCapture> cap;
+ +};
+ +
+ +
+ +class CV_EXPORTS_W VideoWriter
+ +{
+ +public:
+ +    CV_WRAP VideoWriter();
+ +    CV_WRAP VideoWriter(const String& filename, int fourcc, double fps,
+ +                Size frameSize, bool isColor = true);
+ +
+ +    virtual ~VideoWriter();
+ +    CV_WRAP virtual bool open(const String& filename, int fourcc, double fps,
+ +                      Size frameSize, bool isColor = true);
+ +    CV_WRAP virtual bool isOpened() const;
+ +    CV_WRAP virtual void release();
+ +    virtual VideoWriter& operator << (const Mat& image);
+ +    CV_WRAP virtual void write(const Mat& image);
+ +
+ +    CV_WRAP static int fourcc(char c1, char c2, char c3, char c4);
+ +
+ +protected:
+ +    Ptr<CvVideoWriter> writer;
+ +};
+ +
+ +template<> CV_EXPORTS void DefaultDeleter<CvCapture>::operator ()(CvCapture* obj) const;
+ +template<> CV_EXPORTS void DefaultDeleter<CvVideoWriter>::operator ()(CvVideoWriter* obj) const;
+ +
+ +} // cv
+ +
+ +#endif
diff --cc modules/highgui/include/opencv2/highgui/highgui_c.h
Simple merge
diff --cc modules/highgui/src/cap.cpp
Simple merge
diff --cc modules/highgui/src/precomp.hpp
Simple merge
diff --cc modules/highgui/test/test_precomp.hpp
Simple merge
diff --cc modules/imgproc/src/imgwarp.cpp
Simple merge
diff --cc modules/java/generator/gen_java.py
Simple merge
diff --cc modules/nonfree/src/surf.ocl.cpp

index 5ade5e5,293fd84..8fd717c
--- 1/modules/nonfree/src/surf.ocl.cpp
--- 2/modules/nonfree/src/surf.ocl.cpp
+++ b/modules/nonfree/src/surf.ocl.cpp
@@@ -57,18 -56,25 +58,25 @@@ namespace c
   {
       namespace ocl
       {
- -        static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
+         // The number of degrees between orientation samples in calcOrientation
+         const static int ORI_SEARCH_INC = 5;
+         // The local size of the calcOrientation kernel
+         const static int ORI_LOCAL_SIZE = (360 / ORI_SEARCH_INC);
+ 
+ +        static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
               size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
           {
-             char optBuf [100] = {0};
-             char * optBufPtr = optBuf;
+             std::stringstream optsStr;
+             optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
+             optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
               cl_kernel kernel;
-             kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
+             kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optsStr.str().c_str());
               size_t wave_size = queryWaveFrontSize(kernel);
               CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
-             sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
-             openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
+             optsStr << "-D WAVE_SIZE=" << wave_size;
+             openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
           }
+ 
       }
   }
   
@@@ -601,10 -602,10 +609,10 @@@ void SURF_OCL_Invoker::icvCalcOrientati
       args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
       args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
   
-     size_t localThreads[3]  = {32, 4, 1};
-     size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
+     size_t localThreads[3]  = {ORI_LOCAL_SIZE, 1, 1};
+     size_t globalThreads[3] = {nFeatures * localThreads[0], 1, 1};
   
- -    openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1);
+ +    openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
   }
   
   void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
diff --cc modules/ocl/doc/image_filtering.rst
Simple merge
diff --cc modules/ocl/include/opencv2/ocl.hpp

index 542dbeb,0000000..bc83892

mode 100644,000000..100644
--- 1/modules/ocl/include/opencv2/ocl.hpp
--- /dev/null
+++ b/modules/ocl/include/opencv2/ocl.hpp
@@@ -1,2104 -1,0 +1,2106 @@@
-                 const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +// This file is part of OpenCV project.
+ +// It is subject to the license terms in the LICENSE file found in the top-level directory
+ +// of this distribution and at http://opencv.org/license.html.
+ +
+ +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+ +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+ +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+ +// Third party copyrights are property of their respective owners.
+ +
+ +#ifndef __OPENCV_OCL_HPP__
+ +#define __OPENCV_OCL_HPP__
+ +
+ +#include <memory>
+ +#include <vector>
+ +
+ +#include "opencv2/core.hpp"
+ +#include "opencv2/imgproc.hpp"
+ +#include "opencv2/objdetect.hpp"
+ +#include "opencv2/ml.hpp"
+ +
+ +namespace cv
+ +{
+ +    namespace ocl
+ +    {
+ +        enum DeviceType
+ +        {
+ +            CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
+ +            CVCL_DEVICE_TYPE_CPU         = (1 << 1),
+ +            CVCL_DEVICE_TYPE_GPU         = (1 << 2),
+ +            CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3),
+ +            //CVCL_DEVICE_TYPE_CUSTOM      = (1 << 4)
+ +            CVCL_DEVICE_TYPE_ALL         = 0xFFFFFFFF
+ +        };
+ +
+ +        enum DevMemRW
+ +        {
+ +            DEVICE_MEM_R_W = 0,
+ +            DEVICE_MEM_R_ONLY,
+ +            DEVICE_MEM_W_ONLY
+ +        };
+ +
+ +        enum DevMemType
+ +        {
+ +            DEVICE_MEM_DEFAULT = 0,
+ +            DEVICE_MEM_AHP,         //alloc host pointer
+ +            DEVICE_MEM_UHP,         //use host pointer
+ +            DEVICE_MEM_CHP,         //copy host pointer
+ +            DEVICE_MEM_PM           //persistent memory
+ +        };
+ +
+ +        // these classes contain OpenCL runtime information
+ +
+ +        struct PlatformInfo;
+ +
+ +        struct DeviceInfo
+ +        {
+ +        public:
+ +            int _id; // reserved, don't use it
+ +
+ +            DeviceType deviceType;
+ +            std::string deviceProfile;
+ +            std::string deviceVersion;
+ +            std::string deviceName;
+ +            std::string deviceVendor;
+ +            int deviceVendorId;
+ +            std::string deviceDriverVersion;
+ +            std::string deviceExtensions;
+ +
+ +            size_t maxWorkGroupSize;
+ +            std::vector<size_t> maxWorkItemSizes;
+ +            int maxComputeUnits;
+ +            size_t localMemorySize;
+ +            size_t maxMemAllocSize;
+ +
+ +            int deviceVersionMajor;
+ +            int deviceVersionMinor;
+ +
+ +            bool haveDoubleSupport;
+ +            bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
+ +            bool isIntelDevice;
+ +
+ +            std::string compilationExtraOptions;
+ +
+ +            const PlatformInfo* platform;
+ +
+ +            DeviceInfo();
+ +            ~DeviceInfo();
+ +        };
+ +
+ +        struct PlatformInfo
+ +        {
+ +            int _id; // reserved, don't use it
+ +
+ +            std::string platformProfile;
+ +            std::string platformVersion;
+ +            std::string platformName;
+ +            std::string platformVendor;
+ +            std::string platformExtensons;
+ +
+ +            int platformVersionMajor;
+ +            int platformVersionMinor;
+ +
+ +            std::vector<const DeviceInfo*> devices;
+ +
+ +            PlatformInfo();
+ +            ~PlatformInfo();
+ +        };
+ +
+ +        //////////////////////////////// Initialization & Info ////////////////////////
+ +        typedef std::vector<const PlatformInfo*> PlatformsInfo;
+ +
+ +        CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
+ +
+ +        typedef std::vector<const DeviceInfo*> DevicesInfo;
+ +
+ +        CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
+ +                const PlatformInfo* platform = NULL);
+ +
+ +        // set device you want to use
+ +        CV_EXPORTS void setDevice(const DeviceInfo* info);
+ +
+ +        // Initialize from OpenCL handles directly.
+ +        // Argument types is (pointers): cl_platform_id*, cl_context*, cl_device_id*
+ +        CV_EXPORTS void initializeContext(void* pClPlatform, void* pClContext, void* pClDevice);
+ +
+ +        enum FEATURE_TYPE
+ +        {
+ +            FEATURE_CL_DOUBLE = 1,
+ +            FEATURE_CL_UNIFIED_MEM,
+ +            FEATURE_CL_VER_1_2,
+ +            FEATURE_CL_INTEL_DEVICE
+ +        };
+ +
+ +        // Represents OpenCL context, interface
+ +        class CV_EXPORTS Context
+ +        {
+ +        protected:
+ +            Context() { }
+ +            ~Context() { }
+ +        public:
+ +            static Context *getContext();
+ +
+ +            bool supportsFeature(FEATURE_TYPE featureType) const;
+ +            const DeviceInfo& getDeviceInfo() const;
+ +
+ +            const void* getOpenCLContextPtr() const;
+ +            const void* getOpenCLCommandQueuePtr() const;
+ +            const void* getOpenCLDeviceIDPtr() const;
+ +        };
+ +
+ +        inline const void *getClContextPtr()
+ +        {
+ +            return Context::getContext()->getOpenCLContextPtr();
+ +        }
+ +
+ +        inline const void *getClCommandQueuePtr()
+ +        {
+ +            return Context::getContext()->getOpenCLCommandQueuePtr();
+ +        }
+ +
+ +        CV_EXPORTS bool supportsFeature(FEATURE_TYPE featureType);
+ +
+ +        CV_EXPORTS void finish();
+ +
+ +        enum BINARY_CACHE_MODE
+ +        {
+ +            CACHE_NONE    = 0,        // do not cache OpenCL binary
+ +            CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode
+ +            CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode
+ +            CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // cache opencl binary
+ +        };
+ +        //! Enable or disable OpenCL program binary caching onto local disk
+ +        // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
+ +        // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
+ +        // binary file, which will be reused when the OpenCV executable is started again.
+ +        //
+ +        // This feature is enabled by default.
+ +        CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
+ +
+ +        //! set where binary cache to be saved to
+ +        CV_EXPORTS void setBinaryPath(const char *path);
+ +
+ +        struct ProgramSource
+ +        {
+ +            const char* name;
+ +            const char* programStr;
+ +            const char* programHash;
+ +
+ +            // Cache in memory by name (should be unique). Caching on disk disabled.
+ +            inline ProgramSource(const char* _name, const char* _programStr)
+ +                : name(_name), programStr(_programStr), programHash(NULL)
+ +            {
+ +            }
+ +
+ +            // Cache in memory by name (should be unique). Caching on disk uses programHash mark.
+ +            inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
+ +                : name(_name), programStr(_programStr), programHash(_programHash)
+ +            {
+ +            }
+ +        };
+ +
+ +        //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+ +        //! Deprecated, will be replaced
+ +        CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
+ +                const cv::ocl::ProgramSource& source, String kernelName,
+ +                size_t globalThreads[3], size_t localThreads[3],
+ +                std::vector< std::pair<size_t, const void *> > &args,
+ +                int channels, int depth, const char *build_options);
+ +
+ +        class CV_EXPORTS oclMatExpr;
+ +        //////////////////////////////// oclMat ////////////////////////////////
+ +        class CV_EXPORTS oclMat
+ +        {
+ +        public:
+ +            //! default constructor
+ +            oclMat();
+ +            //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ +            oclMat(int rows, int cols, int type);
+ +            oclMat(Size size, int type);
+ +            //! constucts oclMatrix and fills it with the specified value _s.
+ +            oclMat(int rows, int cols, int type, const Scalar &s);
+ +            oclMat(Size size, int type, const Scalar &s);
+ +            //! copy constructor
+ +            oclMat(const oclMat &m);
+ +
+ +            //! constructor for oclMatrix headers pointing to user-allocated data
+ +            oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +            oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
+ +
+ +            //! creates a matrix header for a part of the bigger matrix
+ +            oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
+ +            oclMat(const oclMat &m, const Rect &roi);
+ +
+ +            //! builds oclMat from Mat. Perfom blocking upload to device.
+ +            explicit oclMat (const Mat &m);
+ +
+ +            //! destructor - calls release()
+ +            ~oclMat();
+ +
+ +            //! assignment operators
+ +            oclMat &operator = (const oclMat &m);
+ +            //! assignment operator. Perfom blocking upload to device.
+ +            oclMat &operator = (const Mat &m);
+ +            oclMat &operator = (const oclMatExpr& expr);
+ +
+ +            //! pefroms blocking upload data to oclMat.
+ +            void upload(const cv::Mat &m);
+ +
+ +
+ +            //! downloads data from device to host memory. Blocking calls.
+ +            operator Mat() const;
+ +            void download(cv::Mat &m) const;
+ +
+ +            //! convert to _InputArray
+ +            operator _InputArray();
+ +
+ +            //! convert to _OutputArray
+ +            operator _OutputArray();
+ +
+ +            //! returns a new oclMatrix header for the specified row
+ +            oclMat row(int y) const;
+ +            //! returns a new oclMatrix header for the specified column
+ +            oclMat col(int x) const;
+ +            //! ... for the specified row span
+ +            oclMat rowRange(int startrow, int endrow) const;
+ +            oclMat rowRange(const Range &r) const;
+ +            //! ... for the specified column span
+ +            oclMat colRange(int startcol, int endcol) const;
+ +            oclMat colRange(const Range &r) const;
+ +
+ +            //! returns deep copy of the oclMatrix, i.e. the data is copied
+ +            oclMat clone() const;
+ +
+ +            //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
+ +            // It calls m.create(this->size(), this->type()).
+ +            // It supports any data type
+ +            void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
+ +
+ +            //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
+ +            void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
+ +
+ +            void assignTo( oclMat &m, int type = -1 ) const;
+ +
+ +            //! sets every oclMatrix element to s
+ +            oclMat& operator = (const Scalar &s);
+ +            //! sets some of the oclMatrix elements to s, according to the mask
+ +            oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
+ +            //! creates alternative oclMatrix header for the same data, with different
+ +            // number of channels and/or different number of rows. see cvReshape.
+ +            oclMat reshape(int cn, int rows = 0) const;
+ +
+ +            //! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
+ +            // previous data is unreferenced if needed.
+ +            void create(int rows, int cols, int type);
+ +            void create(Size size, int type);
+ +
+ +            //! allocates new oclMatrix with specified device memory type.
+ +            void createEx(int rows, int cols, int type,
+ +                          DevMemRW rw_type, DevMemType mem_type);
+ +            void createEx(Size size, int type, DevMemRW rw_type,
+ +                          DevMemType mem_type);
+ +
+ +            //! decreases reference counter;
+ +            // deallocate the data when reference counter reaches 0.
+ +            void release();
+ +
+ +            //! swaps with other smart pointer
+ +            void swap(oclMat &mat);
+ +
+ +            //! locates oclMatrix header within a parent oclMatrix. See below
+ +            void locateROI( Size &wholeSize, Point &ofs ) const;
+ +            //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
+ +            oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ +            //! extracts a rectangular sub-oclMatrix
+ +            // (this is a generalized form of row, rowRange etc.)
+ +            oclMat operator()( Range rowRange, Range colRange ) const;
+ +            oclMat operator()( const Rect &roi ) const;
+ +
+ +            oclMat& operator+=( const oclMat& m );
+ +            oclMat& operator-=( const oclMat& m );
+ +            oclMat& operator*=( const oclMat& m );
+ +            oclMat& operator/=( const oclMat& m );
+ +
+ +            //! returns true if the oclMatrix data is continuous
+ +            // (i.e. when there are no gaps between successive rows).
+ +            // similar to CV_IS_oclMat_CONT(cvoclMat->type)
+ +            bool isContinuous() const;
+ +            //! returns element size in bytes,
+ +            // similar to CV_ELEM_SIZE(cvMat->type)
+ +            size_t elemSize() const;
+ +            //! returns the size of element channel in bytes.
+ +            size_t elemSize1() const;
+ +            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+ +            int type() const;
+ +            //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+ +            //! 3 channels element actually use 4 channel space
+ +            int ocltype() const;
+ +            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+ +            int depth() const;
+ +            //! returns element type, similar to CV_MAT_CN(cvMat->type)
+ +            int channels() const;
+ +            //! returns element type, return 4 for 3 channels element,
+ +            //!becuase 3 channels element actually use 4 channel space
+ +            int oclchannels() const;
+ +            //! returns step/elemSize1()
+ +            size_t step1() const;
+ +            //! returns oclMatrix size:
+ +            // width == number of columns, height == number of rows
+ +            Size size() const;
+ +            //! returns true if oclMatrix data is NULL
+ +            bool empty() const;
+ +
+ +            //! matrix transposition
+ +            oclMat t() const;
+ +
+ +            /*! includes several bit-fields:
+ +              - the magic signature
+ +              - continuity flag
+ +              - depth
+ +              - number of channels
+ +              */
+ +            int flags;
+ +            //! the number of rows and columns
+ +            int rows, cols;
+ +            //! a distance between successive rows in bytes; includes the gap if any
+ +            size_t step;
+ +            //! pointer to the data(OCL memory object)
+ +            uchar *data;
+ +
+ +            //! pointer to the reference counter;
+ +            // when oclMatrix points to user-allocated data, the pointer is NULL
+ +            int *refcount;
+ +
+ +            //! helper fields used in locateROI and adjustROI
+ +            //datastart and dataend are not used in current version
+ +            uchar *datastart;
+ +            uchar *dataend;
+ +
+ +            //! OpenCL context associated with the oclMat object.
+ +            Context *clCxt; // TODO clCtx
+ +            //add offset for handle ROI, calculated in byte
+ +            int offset;
+ +            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
+ +            int wholerows;
+ +            int wholecols;
+ +        };
+ +
+ +        // convert InputArray/OutputArray to oclMat references
+ +        CV_EXPORTS oclMat& getOclMatRef(InputArray src);
+ +        CV_EXPORTS oclMat& getOclMatRef(OutputArray src);
+ +
+ +        ///////////////////// mat split and merge /////////////////////////////////
+ +        //! Compose a multi-channel array from several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst);
+ +        CV_EXPORTS void merge(const std::vector<oclMat> &src, oclMat &dst);
+ +
+ +        //! Divides multi-channel array into several single-channel arrays
+ +        // Support all types
+ +        CV_EXPORTS void split(const oclMat &src, oclMat *dst);
+ +        CV_EXPORTS void split(const oclMat &src, std::vector<oclMat> &dst);
+ +
+ +        ////////////////////////////// Arithmetics ///////////////////////////////////
+ +
+ +        //! adds one matrix to another with scale (dst = src1 * alpha + src2 * beta + gama)
+ +        // supports all data types
+ +        CV_EXPORTS void addWeighted(const oclMat &src1, double  alpha, const oclMat &src2, double beta, double gama, oclMat &dst);
+ +
+ +        //! adds one matrix to another (dst = src1 + src2)
+ +        // supports all data types
+ +        CV_EXPORTS void add(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! adds scalar to a matrix (dst = src1 + s)
+ +        // supports all data types
+ +        CV_EXPORTS void add(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! subtracts one matrix from another (dst = src1 - src2)
+ +        // supports all data types
+ +        CV_EXPORTS void subtract(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        //! subtracts scalar from a matrix (dst = src1 - s)
+ +        // supports all data types
+ +        CV_EXPORTS void subtract(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! computes element-wise product of the two arrays (dst = src1 * scale * src2)
+ +        // supports all data types
+ +        CV_EXPORTS void multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
+ +        //! multiplies matrix to a number (dst = scalar * src)
+ +        // supports all data types
+ +        CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
+ +
+ +        //! computes element-wise quotient of the two arrays (dst = src1 * scale / src2)
+ +        // supports all data types
+ +        CV_EXPORTS void divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scale = 1);
+ +        //! computes element-wise quotient of the two arrays (dst = scale / src)
+ +        // supports all data types
+ +        CV_EXPORTS void divide(double scale, const oclMat &src1, oclMat &dst);
+ +
+ +        //! computes element-wise minimum of the two arrays (dst = min(src1, src2))
+ +        // supports all data types
+ +        CV_EXPORTS void min(const oclMat &src1, const oclMat &src2, oclMat &dst);
+ +
+ +        //! computes element-wise maximum of the two arrays (dst = max(src1, src2))
+ +        // supports all data types
+ +        CV_EXPORTS void max(const oclMat &src1, const oclMat &src2, oclMat &dst);
+ +
+ +        //! compares elements of two arrays (dst = src1 <cmpop> src2)
+ +        // supports all data types
+ +        CV_EXPORTS void compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop);
+ +
+ +        //! transposes the matrix
+ +        // supports all data types
+ +        CV_EXPORTS void transpose(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes element-wise absolute values of an array (dst = abs(src))
+ +        // supports all data types
+ +        CV_EXPORTS void abs(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2))
+ +        // supports all data types
+ +        CV_EXPORTS void absdiff(const oclMat &src1, const oclMat &src2, oclMat &dst);
+ +        //! computes element-wise absolute difference of array and scalar (dst = abs(src1 - s))
+ +        // supports all data types
+ +        CV_EXPORTS void absdiff(const oclMat &src1, const Scalar &s, oclMat &dst);
+ +
+ +        //! computes mean value and standard deviation of all or selected array elements
+ +        // supports all data types
+ +        CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev);
+ +
+ +        //! computes norm of array
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports all data types
+ +        CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2);
+ +
+ +        //! computes norm of the difference between two arrays
+ +        // supports NORM_INF, NORM_L1, NORM_L2
+ +        // supports all data types
+ +        CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2);
+ +
+ +        //! reverses the order of the rows, columns or both in a matrix
+ +        // supports all types
+ +        CV_EXPORTS void flip(const oclMat &src, oclMat &dst, int flipCode);
+ +
+ +        //! computes sum of array elements
+ +        // support all types
+ +        CV_EXPORTS Scalar sum(const oclMat &m);
+ +        CV_EXPORTS Scalar absSum(const oclMat &m);
+ +        CV_EXPORTS Scalar sqrSum(const oclMat &m);
+ +
+ +        //! finds global minimum and maximum array elements and returns their values
+ +        // support all C1 types
+ +        CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+ +
+ +        //! finds global minimum and maximum array elements and returns their values with locations
+ +        // support all C1 types
+ +        CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
+ +                                  const oclMat &mask = oclMat());
+ +
+ +        //! counts non-zero array elements
+ +        // support all types
+ +        CV_EXPORTS int countNonZero(const oclMat &src);
+ +
+ +        //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
+ +        // destination array will have the depth type as lut and the same channels number as source
+ +        //It supports 8UC1 8UC4 only
+ +        CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst);
+ +
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
+ +        //! only 8UC1 and 256 bins is supported now
+ +        CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+ +
+ +        //! only 8UC1 is supported now
+ +        CV_EXPORTS Ptr<cv::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+ +
+ +        //! bilateralFilter
+ +        // supports 8UC1 8UC4
+ +        CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
+ +
+ +        //! Applies an adaptive bilateral filter to the input image
+ +        //  Unlike the usual bilateral filter that uses fixed value for sigmaColor,
+ +        //  the adaptive version calculates the local variance in he ksize neighborhood
+ +        //  and use this as sigmaColor, for the value filtering. However, the local standard deviation is
+ +        //  clamped to the maxSigmaColor.
+ +        //  supports 8UC1, 8UC3
+ +        CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor=20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
+ +
+ +        //! computes exponent of each matrix element (dst = e**src)
+ +        // supports only CV_32FC1, CV_64FC1 type
+ +        CV_EXPORTS void exp(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes natural logarithm of absolute value of each matrix element: dst = log(abs(src))
+ +        // supports only CV_32FC1, CV_64FC1 type
+ +        CV_EXPORTS void log(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes square root of each matrix element
+ +        // supports only CV_32FC1, CV_64FC1 type
+ +        CV_EXPORTS void sqrt(const oclMat &src, oclMat &dst);
+ +
+ +        //! computes magnitude of each (x(i), y(i)) vector
+ +        // supports only CV_32F, CV_64F type
+ +        CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
+ +
+ +        //! computes angle (angle(i)) of each (x(i), y(i)) vector
+ +        // supports only CV_32F, CV_64F type
+ +        CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! the function raises every element of tne input array to p
+ +        // support only CV_32F, CV_64F type
+ +        CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y);
+ +
+ +        //! converts Cartesian coordinates to polar
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false);
+ +
+ +        //! converts polar coordinates to Cartesian
+ +        // supports only CV_32F CV_64F type
+ +        CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false);
+ +
+ +        //! perfroms per-elements bit-wise inversion
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst);
+ +
+ +        //! calculates per-element bit-wise disjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! calculates per-element bit-wise conjunction of two arrays
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! calculates per-element bit-wise "exclusive or" operation
+ +        // supports all types
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
+ +        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
+ +
+ +        //! Logical operators
+ +        CV_EXPORTS oclMat operator ~ (const oclMat &);
+ +        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+ +        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+ +
+ +
+ +        //! Mathematics operators
+ +        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
+ +        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
+ +
+ +        struct CV_EXPORTS ConvolveBuf
+ +        {
+ +            Size result_size;
+ +            Size block_size;
+ +            Size user_block_size;
+ +            Size dft_size;
+ +
+ +            oclMat image_spect, templ_spect, result_spect;
+ +            oclMat image_block, templ_block, result_data;
+ +
+ +            void create(Size image_size, Size templ_size);
+ +            static Size estimateBlockSize(Size result_size, Size templ_size);
+ +        };
+ +
+ +        //! computes convolution of two images, may use discrete Fourier transform
+ +        // support only CV_32FC1 type
+ +        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
+ +        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
+ +
+ +        //! Performs a per-element multiplication of two Fourier spectrums.
+ +        //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
+ +        //! support only CV_32FC2 type
+ +        CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
+ +
+ +        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0);
+ +
+ +        //! initializes a scaled identity matrix
+ +        CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1));
+ +
+ +        //! fills the output array with repeated copies of the input array
+ +        CV_EXPORTS void repeat(const oclMat & src, int ny, int nx, oclMat & dst);
+ +
+ +        //////////////////////////////// Filter Engine ////////////////////////////////
+ +
+ +        /*!
+ +          The Base Class for 1D or Row-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process 1D data.
+ +          In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseRowFilter_GPU
+ +        {
+ +        public:
+ +            BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseRowFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Column-wise Filters
+ +
+ +          This is the base class for linear or non-linear filters that process columns of 2D arrays.
+ +          Such filters are used for the "vertical" filtering parts in separable filters.
+ +          */
+ +        class CV_EXPORTS BaseColumnFilter_GPU
+ +        {
+ +        public:
+ +            BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ +            virtual ~BaseColumnFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            int ksize, anchor, bordertype;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Non-Separable 2D Filters.
+ +
+ +          This is the base class for linear or non-linear 2D filters.
+ +          */
+ +        class CV_EXPORTS BaseFilter_GPU
+ +        {
+ +        public:
+ +            BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
+ +                : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
+ +            virtual ~BaseFilter_GPU() {}
+ +            virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ +            Size ksize;
+ +            Point anchor;
+ +            int borderType;
+ +        };
+ +
+ +        /*!
+ +          The Base Class for Filter Engine.
+ +
+ +          The class can be used to apply an arbitrary filtering operation to an image.
+ +          It contains all the necessary intermediate buffers.
+ +          */
+ +        class CV_EXPORTS FilterEngine_GPU
+ +        {
+ +        public:
+ +            virtual ~FilterEngine_GPU() {}
+ +
+ +            virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
+ +        };
+ +
+ +        //! returns the non-separable filter engine with the specified filter
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);
+ +
+ +        //! returns the primitive row filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! returns the primitive column filter with the specified kernel
+ +        CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel,
+ +                int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0);
+ +
+ +        //! returns the separable linear filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
-         CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
++                const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
+ +
+ +        //! returns the separable filter engine with the specified filters
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ +                const Ptr<BaseColumnFilter_GPU> &columnFilter);
+ +
+ +        //! returns the Gaussian filter engine
-         CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
++        CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
+ +
+ +        //! returns filter engine for the generalized Sobel operator
++        CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
+ +
+ +        //! applies Laplacian operator to the image
+ +        // supports only ksize = 1 and ksize = 3
+ +        CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1,
+ +                double delta=0, int borderType=BORDER_DEFAULT);
+ +
+ +        //! returns 2D box filter
+ +        // dst type must be the same as source type
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType,
+ +                const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns box filter engine
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size &ksize,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D filter with the specified kernel
+ +        // supports: dst type must be the same as source type
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns the non-separable linear filter engine
+ +        // supports: dst type must be the same as source type
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using the normalized box filter
+ +        CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
+ +                                  Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ +
+ +        //! returns 2D morphological filter
+ +        //! only MORPH_ERODE and MORPH_DILATE are supported
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+ +        CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize,
+ +                Point anchor = Point(-1, -1));
+ +
+ +        //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+ +        CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat &kernel,
+ +                const Point &anchor = Point(-1, -1), int iterations = 1);
+ +
+ +        //! a synonym for normalized box filter
+ +        static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
+ +                                int borderType = BORDER_CONSTANT)
+ +        {
+ +            boxFilter(src, dst, -1, ksize, anchor, borderType);
+ +        }
+ +
+ +        //! applies non-separable 2D linear filter to the image
+ +        CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
+ +                                 Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT);
+ +
+ +        //! applies separable 2D linear filter to the image
+ +        CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
+ +                                    Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies generalized Sobel operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! applies the vertical or horizontal Scharr operator to the image
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! smooths the image using Gaussian filter.
+ +        // dst.type must equalize src.type
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
+ +        CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
+ +
+ +        //! erodes the image (applies the local minimum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                               int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! dilates the image (applies the local maximum operator)
+ +        // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
+ +        CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        //! applies an advanced morphological operation to the image
+ +        CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+ +
+ +                                      int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+ +
+ +
+ +        ////////////////////////////// Image processing //////////////////////////////
+ +        //! Does mean shift filtering on GPU.
+ +        CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
+ +                                           TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift procedure on GPU.
+ +        CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
+ +                                      TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! Does mean shift segmentation with elimiation of small regions.
+ +        CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
+ +                                              TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ +
+ +        //! applies fixed threshold to the image.
+ +        // supports CV_8UC1 and CV_32FC1 data type
+ +        // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV
+ +        CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC);
+ +
+ +        //! resizes the image
+ +        // Supports INTER_NEAREST, INTER_LINEAR
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
+ +
+ +        //! Applies a generic geometrical transformation to an image.
+ +
+ +        // Supports INTER_NEAREST, INTER_LINEAR.
+ +        // Map1 supports CV_16SC2, CV_32FC2  types.
+ +        // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
+ +        CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
+ +
+ +        //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
+ +        // supports CV_8UC1, CV_8UC4, CV_32SC1 types
+ +        CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
+ +
+ +        //! Smoothes image using median filter
+ +        // The source 1- or 4-channel image. m should be 3 or 5, the image depth should be CV_8U or CV_32F.
+ +        CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m);
+ +
+ +        //! warps the image using affine transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! warps the image using perspective transformation
+ +        // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
+ +        // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
+ +        CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
+ +
+ +        //! computes the integral image and integral for the squared image
+ +        // sum will support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1 );
+ +        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, int sdepth=-1 );
+ +        CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ +            int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ +        CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ +            int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ +
+ +
+ +        /////////////////////////////////// ML ///////////////////////////////////////////
+ +
+ +        //! Compute closest centers for each lines in source and lable it after center's index
+ +        // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
+ +        // supports NORM_L1 and NORM_L2 distType
+ +        // if indices is provided, only the indexed rows will be calculated and their results are in the same
+ +        // order of indices
+ +        CV_EXPORTS void distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR);
+ +
+ +        //!Does k-means procedure on GPU
+ +        // supports CV_32FC1/CV_32FC2/CV_32FC4 data type
+ +        CV_EXPORTS double kmeans(const oclMat &src, int K, oclMat &bestLabels,
+ +                                     TermCriteria criteria, int attemps, int flags, oclMat &centers);
+ +
+ +
+ +        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
+ +        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ +        class CV_EXPORTS OclCascadeClassifier : public  cv::CascadeClassifier
+ +        {
+ +        public:
+ +            void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
+ +                double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
+ +                Size minSize = Size(), Size maxSize = Size());
+ +        };
+ +
+ +        /////////////////////////////// Pyramid /////////////////////////////////////
+ +        CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
+ +
+ +        //! upsamples the source image and then smoothes it
+ +        CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst);
+ +
+ +        //! performs linear blending of two images
+ +        //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ +        // supports only CV_8UC1 source type
+ +        CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
+ +
+ +        //! computes vertical sum, supports only CV_32FC1 images
+ +        CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
+ +
+ +        ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+ +        struct CV_EXPORTS MatchTemplateBuf
+ +        {
+ +            Size user_block_size;
+ +            oclMat imagef, templf;
+ +            std::vector<oclMat> images;
+ +            std::vector<oclMat> image_sums;
+ +            std::vector<oclMat> image_sqsums;
+ +        };
+ +
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
+ +
+ +        //! computes the proximity map for the raster template and the image where the template is searched for
+ +        // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ +        // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ +        CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
+ +
+ +
+ +
+ +        ///////////////////////////////////////////// Canny /////////////////////////////////////////////
+ +        struct CV_EXPORTS CannyBuf;
+ +
+ +        //! compute edges of the input image using Canny operator
+ +        // Support CV_8UC1 only
+ +        CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +        CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+ +
+ +        struct CV_EXPORTS CannyBuf
+ +        {
+ +            CannyBuf() : counter(1, 1, CV_32S) { }
+ +            ~CannyBuf()
+ +            {
+ +                release();
+ +            }
+ +            explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(1, 1, CV_32S)
+ +            {
+ +                create(image_size, apperture_size);
+ +            }
+ +            CannyBuf(const oclMat &dx_, const oclMat &dy_);
+ +            void create(const Size &image_size, int apperture_size = 3);
+ +            void release();
+ +
+ +            oclMat dx, dy;
+ +            oclMat dx_buf, dy_buf;
+ +            oclMat magBuf, mapBuf;
+ +            oclMat trackBuf1, trackBuf2;
+ +            oclMat counter;
+ +            Ptr<FilterEngine_GPU> filterDX, filterDY;
+ +        };
+ +
+ +        ///////////////////////////////////////// Hough Transform /////////////////////////////////////////
+ +        //! HoughCircles
+ +        struct HoughCirclesBuf
+ +        {
+ +            oclMat edges;
+ +            oclMat accum;
+ +            oclMat srcPoints;
+ +            oclMat centers;
+ +            CannyBuf cannyBuf;
+ +        };
+ +
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096);
+ +        CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles);
+ +
+ +
+ +        ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+ +        //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ +        //! Param dft_size is the size of DFT transform.
+ +        //!
+ +        //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+ +        // support src type of CV32FC1, CV32FC2
+ +        // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+ +        // dft_size is the size of original input, which is used for transformation from complex to real.
+ +        // dft_size must be powers of 2, 3 and 5
+ +        // real to complex dft requires at least v1.8 clAmdFft
+ +        // real to complex dft output is not the same with cpu version
+ +        // real to complex and complex to real does not support DFT_ROWS
+ +        CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(), int flags = 0);
+ +
+ +        //! implements generalized matrix product algorithm GEMM from BLAS
+ +        // The functionality requires clAmdBlas library
+ +        // only support type CV_32FC1
+ +        // flag GEMM_3_T is not supported
+ +        CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ +                             const oclMat &src3, double beta, oclMat &dst, int flags = 0);
+ +
+ +        //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+ +
+ +        struct CV_EXPORTS HOGDescriptor
+ +
+ +        {
+ +
+ +            enum { DEFAULT_WIN_SIGMA = -1 };
+ +
+ +            enum { DEFAULT_NLEVELS = 64 };
+ +
+ +            enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+ +
+ +
+ +
+ +            HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
+ +
+ +                          Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
+ +
+ +                          int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
+ +
+ +                          double threshold_L2hys = 0.2, bool gamma_correction = true,
+ +
+ +                          int nlevels = DEFAULT_NLEVELS);
+ +
+ +
+ +
+ +            size_t getDescriptorSize() const;
+ +
+ +            size_t getBlockHistogramSize() const;
+ +
+ +
+ +
+ +            void setSVMDetector(const std::vector<float> &detector);
+ +
+ +
+ +
+ +            static std::vector<float> getDefaultPeopleDetector();
+ +
+ +            static std::vector<float> getPeopleDetector48x96();
+ +
+ +            static std::vector<float> getPeopleDetector64x128();
+ +
+ +
+ +
+ +            void detect(const oclMat &img, std::vector<Point> &found_locations,
+ +
+ +                        double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                        Size padding = Size());
+ +
+ +
+ +
+ +            void detectMultiScale(const oclMat &img, std::vector<Rect> &found_locations,
+ +
+ +                                  double hit_threshold = 0, Size win_stride = Size(),
+ +
+ +                                  Size padding = Size(), double scale0 = 1.05,
+ +
+ +                                  int group_threshold = 2);
+ +
+ +
+ +
+ +            void getDescriptors(const oclMat &img, Size win_stride,
+ +
+ +                                oclMat &descriptors,
+ +
+ +                                int descr_format = DESCR_FORMAT_COL_BY_COL);
+ +
+ +
+ +
+ +            Size win_size;
+ +
+ +            Size block_size;
+ +
+ +            Size block_stride;
+ +
+ +            Size cell_size;
+ +
+ +            int nbins;
+ +
+ +            double win_sigma;
+ +
+ +            double threshold_L2hys;
+ +
+ +            bool gamma_correction;
+ +
+ +            int nlevels;
+ +
+ +
+ +
+ +        protected:
+ +
+ +            // initialize buffers; only need to do once in case of multiscale detection
+ +
+ +            void init_buffer(const oclMat &img, Size win_stride);
+ +
+ +
+ +
+ +            void computeBlockHistograms(const oclMat &img);
+ +
+ +            void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
+ +
+ +
+ +
+ +            double getWinSigma() const;
+ +
+ +            bool checkDetectorSize() const;
+ +
+ +
+ +
+ +            static int numPartsWithin(int size, int part_size, int stride);
+ +
+ +            static Size numPartsWithin(Size size, Size part_size, Size stride);
+ +
+ +
+ +
+ +            // Coefficients of the separating plane
+ +
+ +            float free_coef;
+ +
+ +            oclMat detector;
+ +
+ +
+ +
+ +            // Results of the last classification step
+ +
+ +            oclMat labels;
+ +
+ +            Mat labels_host;
+ +
+ +
+ +
+ +            // Results of the last histogram evaluation step
+ +
+ +            oclMat block_hists;
+ +
+ +
+ +
+ +            // Gradients conputation results
+ +
+ +            oclMat grad, qangle;
+ +
+ +
+ +
+ +            // scaled image
+ +
+ +            oclMat image_scale;
+ +
+ +
+ +
+ +            // effect size of input image (might be different from original size after scaling)
+ +
+ +            Size effect_size;
+ +
+ +        };
+ +
+ +
+ +        ////////////////////////feature2d_ocl/////////////////
+ +        /****************************************************************************************\
+ +        *                                      Distance                                          *
+ +        \****************************************************************************************/
+ +        template<typename T>
+ +        struct CV_EXPORTS Accumulator
+ +        {
+ +            typedef T Type;
+ +        };
+ +        template<> struct Accumulator<unsigned char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<unsigned short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<char>
+ +        {
+ +            typedef float Type;
+ +        };
+ +        template<> struct Accumulator<short>
+ +        {
+ +            typedef float Type;
+ +        };
+ +
+ +        /*
+ +         * Manhattan distance (city block distance) functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L1
+ +        {
+ +            enum { normType = NORM_L1 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return normL1<ValueType, ResultType>(a, b, size);
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Euclidean distance functor
+ +         */
+ +        template<class T>
+ +        struct CV_EXPORTS L2
+ +        {
+ +            enum { normType = NORM_L2 };
+ +            typedef T ValueType;
+ +            typedef typename Accumulator<T>::Type ResultType;
+ +
+ +            ResultType operator()( const T *a, const T *b, int size ) const
+ +            {
+ +                return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+ +            }
+ +        };
+ +
+ +        /*
+ +         * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ +         * bit count of A exclusive XOR'ed with B
+ +         */
+ +        struct CV_EXPORTS Hamming
+ +        {
+ +            enum { normType = NORM_HAMMING };
+ +            typedef unsigned char ValueType;
+ +            typedef int ResultType;
+ +
+ +            /** this will count the bits in a ^ b
+ +             */
+ +            ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
+ +            {
+ +                return normHamming(a, b, size);
+ +            }
+ +        };
+ +
+ +        ////////////////////////////////// BruteForceMatcher //////////////////////////////////
+ +
+ +        class CV_EXPORTS BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            enum DistType {L1Dist = 0, L2Dist, HammingDist};
+ +            explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
+ +
+ +            // Add descriptors to train descriptor collection
+ +            void add(const std::vector<oclMat> &descCollection);
+ +
+ +            // Get train descriptors collection
+ +            const std::vector<oclMat> &getTrainDescriptors() const;
+ +
+ +            // Clear train descriptors collection
+ +            void clear();
+ +
+ +            // Return true if there are not train descriptors in collection
+ +            bool empty() const;
+ +
+ +            // Return true if the matcher supports mask in match methods
+ +            bool isMaskSupported() const;
+ +
+ +            // Find one best match for each query descriptor
+ +            void matchSingle(const oclMat &query, const oclMat &train,
+ +                             oclMat &trainIdx, oclMat &distance,
+ +                             const oclMat &mask = oclMat());
+ +
+ +            // Download trainIdx and distance and convert it to CPU vector with DMatch
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ +            // Convert trainIdx and distance to vector with DMatch
+ +            static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
+ +            // Find one best match for each query descriptor
+ +            void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
+ +
+ +            // Make gpu collection of trains and masks in suitable format for matchCollection function
+ +            void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +            // Find one best match from train collection for each query descriptor
+ +            void matchCollection(const oclMat &query, const oclMat &trainCollection,
+ +                                 oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ +                                 const oclMat &masks = oclMat());
+ +
+ +            // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+ +            static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
+ +            // Convert trainIdx, imgIdx and distance to vector with DMatch
+ +            static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
+ +
+ +            // Find one best match from train collection for each query descriptor.
+ +            void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +            // Find k best matches for each query descriptor (in increasing order of distances)
+ +            void knnMatchSingle(const oclMat &query, const oclMat &train,
+ +                                oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
+ +                                const oclMat &mask = oclMat());
+ +
+ +            // Download trainIdx and distance and convert it to vector with DMatch
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx and distance to vector with DMatch
+ +            static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
+ +                                        std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find k best matches for each query descriptor (in increasing order of distances).
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            void knnMatch(const oclMat &query, const oclMat &train,
+ +                          std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
+ +                          bool compactResult = false);
+ +
+ +            // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+ +            void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
+ +                                     oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ +                                     const oclMat &maskCollection = oclMat());
+ +
+ +            // Download trainIdx and distance and convert it to vector with DMatch
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
+ +                                          std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx and distance to vector with DMatch
+ +            static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
+ +                                         std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find k best matches  for each query descriptor (in increasing order of distances).
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
+ +                          const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
+ +            // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+ +            // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+ +            // because it didn't have enough memory.
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +            // Matches doesn't sorted.
+ +            void radiusMatchSingle(const oclMat &query, const oclMat &train,
+ +                                   oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ +                                   const oclMat &mask = oclMat());
+ +
+ +            // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+ +            // matches will be sorted in increasing order of distances.
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance
+ +            // in increasing order of distances).
+ +            void radiusMatch(const oclMat &query, const oclMat &train,
+ +                             std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ +                             const oclMat &mask = oclMat(), bool compactResult = false);
+ +
+ +            // Find best matches for each query descriptor which have distance less than maxDistance.
+ +            // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+ +            // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+ +            // Matches doesn't sorted.
+ +            void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+ +                                       const std::vector<oclMat> &masks = std::vector<oclMat>());
+ +
+ +            // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+ +            // matches will be sorted in increasing order of distances.
+ +            // compactResult is used when mask is not empty. If compactResult is false matches
+ +            // vector will have the same size as queryDescriptors rows. If compactResult is true
+ +            // matches vector will not contain matches for fully masked out query descriptors.
+ +            static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
+ +                                            std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +            // Convert trainIdx, nMatches and distance to vector with DMatch.
+ +            static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
+ +                                           std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+ +
+ +            // Find best matches from train collection for each query descriptor which have distance less than
+ +            // maxDistance (in increasing order of distances).
+ +            void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
+ +                             const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+ +
+ +            DistType distType;
+ +
+ +        private:
+ +            std::vector<oclMat> trainDescCollection;
+ +        };
+ +
+ +        template <class Distance>
+ +        class CV_EXPORTS BruteForceMatcher_OCL;
+ +
+ +        template <typename T>
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
+ +            explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
+ +        };
+ +        template <typename T>
+ +        class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
+ +            explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
+ +        };
+ +        template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
+ +            explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
+ +        };
+ +
+ +        class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
+ +        {
+ +        public:
+ +            explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
+ +        };
+ +
+ +        class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
+ +        {
+ +        public:
+ +            explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+ +                int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+ +
+ +            //! return 1 rows matrix with CV_32FC2 type
+ +            void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
+ +            //! download points of type Point2f to a vector. the vector's content will be erased
+ +            void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
+ +
+ +            int maxCorners;
+ +            double qualityLevel;
+ +            double minDistance;
+ +
+ +            int blockSize;
+ +            bool useHarrisDetector;
+ +            double harrisK;
+ +            void releaseMemory()
+ +            {
+ +                Dx_.release();
+ +                Dy_.release();
+ +                eig_.release();
+ +                minMaxbuf_.release();
+ +                tmpCorners_.release();
+ +            }
+ +        private:
+ +            oclMat Dx_;
+ +            oclMat Dy_;
+ +            oclMat eig_;
++            oclMat eig_minmax_;
+ +            oclMat minMaxbuf_;
+ +            oclMat tmpCorners_;
++            oclMat counter_;
+ +        };
+ +
+ +        inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
+ +            int blockSize_, bool useHarrisDetector_, double harrisK_)
+ +        {
+ +            maxCorners = maxCorners_;
+ +            qualityLevel = qualityLevel_;
+ +            minDistance = minDistance_;
+ +            blockSize = blockSize_;
+ +            useHarrisDetector = useHarrisDetector_;
+ +            harrisK = harrisK_;
+ +        }
+ +
+ +        ////////////////////////////////// FAST Feature Detector //////////////////////////////////
+ +        class CV_EXPORTS FAST_OCL
+ +        {
+ +        public:
+ +            enum
+ +            {
+ +                X_ROW = 0,
+ +                Y_ROW,
+ +                RESPONSE_ROW,
+ +                ROWS_COUNT
+ +            };
+ +
+ +            // all features have same size
+ +            static const int FEATURE_SIZE = 7;
+ +
+ +            explicit FAST_OCL(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
+ +
+ +            //! finds the keypoints using FAST detector
+ +            //! supports only CV_8UC1 images
+ +            void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
+ +            void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
+ +
+ +            //! download keypoints from device to host memory
+ +            static void downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
+ +
+ +            //! convert keypoints to KeyPoint vector
+ +            static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
+ +
+ +            //! release temporary buffer's memory
+ +            void release();
+ +
+ +            bool nonmaxSupression;
+ +
+ +            int threshold;
+ +
+ +            //! max keypoints = keypointsRatio * img.size().area()
+ +            double keypointsRatio;
+ +
+ +            //! find keypoints and compute it's response if nonmaxSupression is true
+ +            //! return count of detected keypoints
+ +            int calcKeyPointsLocation(const oclMat& image, const oclMat& mask);
+ +
+ +            //! get final array of keypoints
+ +            //! performs nonmax supression if needed
+ +            //! return final count of keypoints
+ +            int getKeyPoints(oclMat& keypoints);
+ +
+ +        private:
+ +            oclMat kpLoc_;
+ +            int count_;
+ +
+ +            oclMat score_;
+ +
+ +            oclMat d_keypoints_;
+ +
+ +            int calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints);
+ +            int nonmaxSupressionOCL(oclMat& keypoints);
+ +        };
+ +
+ +        /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+ +
+ +        class CV_EXPORTS PyrLKOpticalFlow
+ +        {
+ +        public:
+ +            PyrLKOpticalFlow()
+ +            {
+ +                winSize = Size(21, 21);
+ +                maxLevel = 3;
+ +                iters = 30;
+ +                derivLambda = 0.5;
+ +                useInitialFlow = false;
+ +                minEigThreshold = 1e-4f;
+ +                getMinEigenVals = false;
+ +                isDeviceArch11_ = false;
+ +            }
+ +
+ +            void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
+ +                        oclMat &status, oclMat *err = 0);
+ +
+ +            void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
+ +
+ +            Size winSize;
+ +            int maxLevel;
+ +            int iters;
+ +            double derivLambda;
+ +            bool useInitialFlow;
+ +            float minEigThreshold;
+ +            bool getMinEigenVals;
+ +
+ +            void releaseMemory()
+ +            {
+ +                dx_calcBuf_.release();
+ +                dy_calcBuf_.release();
+ +
+ +                prevPyr_.clear();
+ +                nextPyr_.clear();
+ +
+ +                dx_buf_.release();
+ +                dy_buf_.release();
+ +            }
+ +
+ +        private:
+ +            void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
+ +
+ +            void buildImagePyramid(const oclMat &img0, std::vector<oclMat> &pyr, bool withBorder);
+ +
+ +            oclMat dx_calcBuf_;
+ +            oclMat dy_calcBuf_;
+ +
+ +            std::vector<oclMat> prevPyr_;
+ +            std::vector<oclMat> nextPyr_;
+ +
+ +            oclMat dx_buf_;
+ +            oclMat dy_buf_;
+ +
+ +            oclMat uPyr_[2];
+ +            oclMat vPyr_[2];
+ +
+ +            bool isDeviceArch11_;
+ +        };
+ +
+ +        class CV_EXPORTS FarnebackOpticalFlow
+ +        {
+ +        public:
+ +            FarnebackOpticalFlow();
+ +
+ +            int numLevels;
+ +            double pyrScale;
+ +            bool fastPyramids;
+ +            int winSize;
+ +            int numIters;
+ +            int polyN;
+ +            double polySigma;
+ +            int flags;
+ +
+ +            void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
+ +
+ +            void releaseMemory();
+ +
+ +        private:
+ +            void prepareGaussian(
+ +                int n, double sigma, float *g, float *xg, float *xxg,
+ +                double &ig11, double &ig03, double &ig33, double &ig55);
+ +
+ +            void setPolynomialExpansionConsts(int n, double sigma);
+ +
+ +            void updateFlow_boxFilter(
+ +                const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
+ +                oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+ +
+ +            void updateFlow_gaussianBlur(
+ +                const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
+ +                oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+ +
+ +            oclMat frames_[2];
+ +            oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
+ +            std::vector<oclMat> pyramid0_, pyramid1_;
+ +        };
+ +
+ +        //////////////// build warping maps ////////////////////
+ +        //! builds plane warping maps
+ +        CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds cylindrical warping maps
+ +        CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds spherical warping maps
+ +        CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y);
+ +        //! builds Affine warping maps
+ +        CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        //! builds Perspective warping maps
+ +        CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+ +
+ +        ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
+ +        //! Interpolate frames (images) using provided optical flow (displacement field).
+ +        //! frame0   - frame 0 (32-bit floating point images, single channel)
+ +        //! frame1   - frame 1 (the same type and size)
+ +        //! fu       - forward horizontal displacement
+ +        //! fv       - forward vertical displacement
+ +        //! bu       - backward horizontal displacement
+ +        //! bv       - backward vertical displacement
+ +        //! pos      - new frame position
+ +        //! newFrame - new frame
+ +        //! buf      - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
+ +        //!            occlusion masks            0, occlusion masks            1,
+ +        //!            interpolated forward flow  0, interpolated forward flow  1,
+ +        //!            interpolated backward flow 0, interpolated backward flow 1
+ +        //!
+ +        CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
+ +                                          const oclMat &fu, const oclMat &fv,
+ +                                          const oclMat &bu, const oclMat &bv,
+ +                                          float pos, oclMat &newFrame, oclMat &buf);
+ +
+ +        //! computes moments of the rasterized shape or a vector of points
+ +        //! _array should be a vector a points standing for the contour
+ +        CV_EXPORTS Moments ocl_moments(InputArray contour);
+ +        //! src should be a general image uploaded to the GPU.
+ +        //! the supported oclMat type are CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 and CV_64FC1
+ +        //! to use type of CV_64FC1, the GPU should support CV_64FC1
+ +        CV_EXPORTS Moments ocl_moments(oclMat& src, bool binary);
+ +
+ +        class CV_EXPORTS StereoBM_OCL
+ +        {
+ +        public:
+ +            enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
+ +
+ +            enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
+ +
+ +            //! the default constructor
+ +            StereoBM_OCL();
+ +            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
+ +            StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
+ +
+ +            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+ +            //! Output disparity has CV_8U type.
+ +            void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
+ +
+ +            //! Some heuristics that tries to estmate
+ +            // if current GPU will be faster then CPU in this algorithm.
+ +            // It queries current active device.
+ +            static bool checkIfGpuCallReasonable();
+ +
+ +            int preset;
+ +            int ndisp;
+ +            int winSize;
+ +
+ +            // If avergeTexThreshold  == 0 => post procesing is disabled
+ +            // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
+ +            // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
+ +            // i.e. input left image is low textured.
+ +            float avergeTexThreshold;
+ +        private:
+ +            oclMat minSSD, leBuf, riBuf;
+ +        };
+ +
+ +        class CV_EXPORTS StereoBeliefPropagation
+ +        {
+ +        public:
+ +            enum { DEFAULT_NDISP  = 64 };
+ +            enum { DEFAULT_ITERS  = 5  };
+ +            enum { DEFAULT_LEVELS = 5  };
+ +            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
+ +            explicit StereoBeliefPropagation(int ndisp  = DEFAULT_NDISP,
+ +                                             int iters  = DEFAULT_ITERS,
+ +                                             int levels = DEFAULT_LEVELS,
+ +                                             int msg_type = CV_16S);
+ +            StereoBeliefPropagation(int ndisp, int iters, int levels,
+ +                                    float max_data_term, float data_weight,
+ +                                    float max_disc_term, float disc_single_jump,
+ +                                    int msg_type = CV_32F);
+ +            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ +            void operator()(const oclMat &data, oclMat &disparity);
+ +            int ndisp;
+ +            int iters;
+ +            int levels;
+ +            float max_data_term;
+ +            float data_weight;
+ +            float max_disc_term;
+ +            float disc_single_jump;
+ +            int msg_type;
+ +        private:
+ +            oclMat u, d, l, r, u2, d2, l2, r2;
+ +            std::vector<oclMat> datas;
+ +            oclMat out;
+ +        };
+ +
+ +        class CV_EXPORTS StereoConstantSpaceBP
+ +        {
+ +        public:
+ +            enum { DEFAULT_NDISP    = 128 };
+ +            enum { DEFAULT_ITERS    = 8   };
+ +            enum { DEFAULT_LEVELS   = 4   };
+ +            enum { DEFAULT_NR_PLANE = 4   };
+ +            static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
+ +            explicit StereoConstantSpaceBP(
+ +                int ndisp    = DEFAULT_NDISP,
+ +                int iters    = DEFAULT_ITERS,
+ +                int levels   = DEFAULT_LEVELS,
+ +                int nr_plane = DEFAULT_NR_PLANE,
+ +                int msg_type = CV_32F);
+ +            StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
+ +                float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
+ +                int min_disp_th = 0,
+ +                int msg_type = CV_32F);
+ +            void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
+ +            int ndisp;
+ +            int iters;
+ +            int levels;
+ +            int nr_plane;
+ +            float max_data_term;
+ +            float data_weight;
+ +            float max_disc_term;
+ +            float disc_single_jump;
+ +            int min_disp_th;
+ +            int msg_type;
+ +            bool use_local_init_data_cost;
+ +        private:
+ +            oclMat u[2], d[2], l[2], r[2];
+ +            oclMat disp_selected_pyr[2];
+ +            oclMat data_cost;
+ +            oclMat data_cost_selected;
+ +            oclMat temp;
+ +            oclMat out;
+ +        };
+ +
+ +        // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+ +        //
+ +        // see reference:
+ +        //   [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+ +        //   [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+ +        class CV_EXPORTS OpticalFlowDual_TVL1_OCL
+ +        {
+ +        public:
+ +            OpticalFlowDual_TVL1_OCL();
+ +
+ +            void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
+ +
+ +            void collectGarbage();
+ +
+ +            /**
+ +            * Time step of the numerical scheme.
+ +            */
+ +            double tau;
+ +
+ +            /**
+ +            * Weight parameter for the data term, attachment parameter.
+ +            * This is the most relevant parameter, which determines the smoothness of the output.
+ +            * The smaller this parameter is, the smoother the solutions we obtain.
+ +            * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+ +            */
+ +            double lambda;
+ +
+ +            /**
+ +            * Weight parameter for (u - v)^2, tightness parameter.
+ +            * It serves as a link between the attachment and the regularization terms.
+ +            * In theory, it should have a small value in order to maintain both parts in correspondence.
+ +            * The method is stable for a large range of values of this parameter.
+ +            */
+ +            double theta;
+ +
+ +            /**
+ +            * Number of scales used to create the pyramid of images.
+ +            */
+ +            int nscales;
+ +
+ +            /**
+ +            * Number of warpings per scale.
+ +            * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+ +            * This is a parameter that assures the stability of the method.
+ +            * It also affects the running time, so it is a compromise between speed and accuracy.
+ +            */
+ +            int warps;
+ +
+ +            /**
+ +            * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+ +            * A small value will yield more accurate solutions at the expense of a slower convergence.
+ +            */
+ +            double epsilon;
+ +
+ +            /**
+ +            * Stopping criterion iterations number used in the numerical scheme.
+ +            */
+ +            int iterations;
+ +
+ +            bool useInitialFlow;
+ +
+ +        private:
+ +            void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
+ +
+ +            std::vector<oclMat> I0s;
+ +            std::vector<oclMat> I1s;
+ +            std::vector<oclMat> u1s;
+ +            std::vector<oclMat> u2s;
+ +
+ +            oclMat I1x_buf;
+ +            oclMat I1y_buf;
+ +
+ +            oclMat I1w_buf;
+ +            oclMat I1wx_buf;
+ +            oclMat I1wy_buf;
+ +
+ +            oclMat grad_buf;
+ +            oclMat rho_c_buf;
+ +
+ +            oclMat p11_buf;
+ +            oclMat p12_buf;
+ +            oclMat p21_buf;
+ +            oclMat p22_buf;
+ +
+ +            oclMat diff_buf;
+ +            oclMat norm_buf;
+ +        };
+ +        // current supported sorting methods
+ +        enum
+ +        {
+ +            SORT_BITONIC,   // only support power-of-2 buffer size
+ +            SORT_SELECTION, // cannot sort duplicate keys
+ +            SORT_MERGE,
+ +            SORT_RADIX      // only support signed int/float keys(CV_32S/CV_32F)
+ +        };
+ +        //! Returns the sorted result of all the elements in input based on equivalent keys.
+ +        //
+ +        //  The element unit in the values to be sorted is determined from the data type,
+ +        //  i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
+ +        //  matrix dimension.
+ +        //  both keys and values will be sorted inplace
+ +        //  Key needs to be single channel oclMat.
+ +        //
+ +        //  Example:
+ +        //  input -
+ +        //    keys   = {2,    3,   1}   (CV_8UC1)
+ +        //    values = {10,5, 4,3, 6,2} (CV_8UC2)
+ +        //  sortByKey(keys, values, SORT_SELECTION, false);
+ +        //  output -
+ +        //    keys   = {1,    2,   3}   (CV_8UC1)
+ +        //    values = {6,2, 10,5, 4,3} (CV_8UC2)
+ +        CV_EXPORTS void sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
+ +        /*!Base class for MOG and MOG2!*/
+ +        class CV_EXPORTS BackgroundSubtractor
+ +        {
+ +        public:
+ +            //! the virtual destructor
+ +            virtual ~BackgroundSubtractor();
+ +            //! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
+ +            virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
+ +
+ +            //! computes a background image
+ +            virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
+ +        };
+ +                /*!
+ +        Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+ +
+ +        The class implements the following algorithm:
+ +        "An improved adaptive background mixture model for real-time tracking with shadow detection"
+ +        P. KadewTraKuPong and R. Bowden,
+ +        Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+ +        http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+ +        */
+ +        class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
+ +        {
+ +        public:
+ +            //! the default constructor
+ +            MOG(int nmixtures = -1);
+ +
+ +            //! re-initiaization method
+ +            void initialize(Size frameSize, int frameType);
+ +
+ +            //! the update operator
+ +            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
+ +
+ +            //! computes a background image which are the mean of all background gaussians
+ +            void getBackgroundImage(oclMat& backgroundImage) const;
+ +
+ +            //! releases all inner buffers
+ +            void release();
+ +
+ +            int history;
+ +            float varThreshold;
+ +            float backgroundRatio;
+ +            float noiseSigma;
+ +
+ +        private:
+ +            int nmixtures_;
+ +
+ +            Size frameSize_;
+ +            int frameType_;
+ +            int nframes_;
+ +
+ +            oclMat weight_;
+ +            oclMat sortKey_;
+ +            oclMat mean_;
+ +            oclMat var_;
+ +        };
+ +
+ +        /*!
+ +        The class implements the following algorithm:
+ +        "Improved adaptive Gausian mixture model for background subtraction"
+ +        Z.Zivkovic
+ +        International Conference Pattern Recognition, UK, August, 2004.
+ +        http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+ +        */
+ +        class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
+ +        {
+ +        public:
+ +            //! the default constructor
+ +            MOG2(int nmixtures = -1);
+ +
+ +            //! re-initiaization method
+ +            void initialize(Size frameSize, int frameType);
+ +
+ +            //! the update operator
+ +            void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
+ +
+ +            //! computes a background image which are the mean of all background gaussians
+ +            void getBackgroundImage(oclMat& backgroundImage) const;
+ +
+ +            //! releases all inner buffers
+ +            void release();
+ +
+ +            // parameters
+ +            // you should call initialize after parameters changes
+ +
+ +            int history;
+ +
+ +            //! here it is the maximum allowed number of mixture components.
+ +            //! Actual number is determined dynamically per pixel
+ +            float varThreshold;
+ +            // threshold on the squared Mahalanobis distance to decide if it is well described
+ +            // by the background model or not. Related to Cthr from the paper.
+ +            // This does not influence the update of the background. A typical value could be 4 sigma
+ +            // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+ +
+ +            /////////////////////////
+ +            // less important parameters - things you might change but be carefull
+ +            ////////////////////////
+ +
+ +            float backgroundRatio;
+ +            // corresponds to fTB=1-cf from the paper
+ +            // TB - threshold when the component becomes significant enough to be included into
+ +            // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+ +            // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+ +            // it is considered foreground
+ +            // float noiseSigma;
+ +            float varThresholdGen;
+ +
+ +            //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+ +            //when a sample is close to the existing components. If it is not close
+ +            //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+ +            //Smaller Tg leads to more generated components and higher Tg might make
+ +            //lead to small number of components but they can grow too large
+ +            float fVarInit;
+ +            float fVarMin;
+ +            float fVarMax;
+ +
+ +            //initial variance  for the newly generated components.
+ +            //It will will influence the speed of adaptation. A good guess should be made.
+ +            //A simple way is to estimate the typical standard deviation from the images.
+ +            //I used here 10 as a reasonable value
+ +            // min and max can be used to further control the variance
+ +            float fCT; //CT - complexity reduction prior
+ +            //this is related to the number of samples needed to accept that a component
+ +            //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+ +            //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+ +
+ +            //shadow detection parameters
+ +            bool bShadowDetection; //default 1 - do shadow detection
+ +            unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+ +            float fTau;
+ +            // Tau - shadow threshold. The shadow is detected if the pixel is darker
+ +            //version of the background. Tau is a threshold on how much darker the shadow can be.
+ +            //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+ +            //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+ +
+ +        private:
+ +            int nmixtures_;
+ +
+ +            Size frameSize_;
+ +            int frameType_;
+ +            int nframes_;
+ +
+ +            oclMat weight_;
+ +            oclMat variance_;
+ +            oclMat mean_;
+ +
+ +            oclMat bgmodelUsedModes_; //keep track of number of modes per pixel
+ +        };
+ +
+ +        /*!***************Kalman Filter*************!*/
+ +        class CV_EXPORTS KalmanFilter
+ +        {
+ +        public:
+ +            KalmanFilter();
+ +            //! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
+ +            KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
+ +            //! re-initializes Kalman filter. The previous content is destroyed.
+ +            void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
+ +
+ +            const oclMat& predict(const oclMat& control=oclMat());
+ +            const oclMat& correct(const oclMat& measurement);
+ +
+ +            oclMat statePre;           //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
+ +            oclMat statePost;          //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
+ +            oclMat transitionMatrix;   //!< state transition matrix (A)
+ +            oclMat controlMatrix;      //!< control matrix (B) (not used if there is no control)
+ +            oclMat measurementMatrix;  //!< measurement matrix (H)
+ +            oclMat processNoiseCov;    //!< process noise covariance matrix (Q)
+ +            oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
+ +            oclMat errorCovPre;        //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
+ +            oclMat gain;               //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
+ +            oclMat errorCovPost;       //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
+ +        private:
+ +            oclMat temp1;
+ +            oclMat temp2;
+ +            oclMat temp3;
+ +            oclMat temp4;
+ +            oclMat temp5;
+ +        };
+ +
+ +        /*!***************K Nearest Neighbour*************!*/
+ +        class CV_EXPORTS KNearestNeighbour: public CvKNearest
+ +        {
+ +        public:
+ +            KNearestNeighbour();
+ +            ~KNearestNeighbour();
+ +
+ +            bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
+ +                bool isRegression = false, int max_k = 32, bool updateBase = false);
+ +
+ +            void clear();
+ +
+ +            void find_nearest(const oclMat& samples, int k, oclMat& lables);
+ +
+ +        private:
+ +            oclMat samples_ocl;
+ +        };
+ +
+ +        /*!***************  SVM  *************!*/
+ +        class CV_EXPORTS CvSVM_OCL : public CvSVM
+ +        {
+ +        public:
+ +            CvSVM_OCL();
+ +
+ +            CvSVM_OCL(const cv::Mat& trainData, const cv::Mat& responses,
+ +                      const cv::Mat& varIdx=cv::Mat(), const cv::Mat& sampleIdx=cv::Mat(),
+ +                      CvSVMParams params=CvSVMParams());
+ +            CV_WRAP float predict( const int row_index, Mat& src, bool returnDFVal=false ) const;
+ +            CV_WRAP void predict( cv::InputArray samples, cv::OutputArray results ) const;
+ +            CV_WRAP float predict( const cv::Mat& sample, bool returnDFVal=false ) const;
+ +            float predict( const CvMat* samples, CV_OUT CvMat* results ) const;
+ +
+ +        protected:
+ +            float predict( const int row_index, int row_len, Mat& src, bool returnDFVal=false ) const;
+ +            void create_kernel();
+ +            void create_solver();
+ +        };
+ +
+ +        /*!***************  END  *************!*/
+ +    }
+ +}
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( push)
+ +#  pragma warning( disable: 4267)
+ +#endif
+ +#include "opencv2/ocl/matrix_operations.hpp"
+ +#if defined _MSC_VER && _MSC_VER >= 1200
+ +#  pragma warning( pop)
+ +#endif
+ +
+ +#endif /* __OPENCV_OCL_HPP__ */
diff --cc modules/ocl/src/color.cpp

index f71081d,e323934..408ba4c
--- 1/modules/ocl/src/color.cpp
--- 2/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@@ -56,27 -56,104 +56,104 @@@ static void fromRGB_caller(const oclMa
   {
       int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
       int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+     int pixels_per_work_item = 1;
   
-     String build_options = format("-D DEPTH_%d", src.depth());
+     if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
+     {
+         if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
+             pixels_per_work_item =  4;
+         else if (src.cols % 2 == 0)
+             pixels_per_work_item =  2;
+         else
+             pixels_per_work_item =  1;
+     }
+ 
- -    std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
++    String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
       if (!additionalOptions.empty())
- -        build_options += additionalOptions;
- -
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ +        build_options = build_options + additionalOptions;
+ +
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
   
       if (!data1.empty())
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
       if (!data2.empty())
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
   
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+     size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
+ #ifdef ANDROID
+     size_t lt[3] = { 16, 10, 1 };
+ #else
+     size_t lt[3] = { 16, 16, 1 };
+ #endif
+     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
+ }
+ 
+ static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
+                            const std::string & additionalOptions = std::string(),
+                            const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
+ {
+     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
+     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+ 
+     std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
+     if (!additionalOptions.empty())
+         build_options += additionalOptions;
+ 
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data ));
++    std::vector<std::pair<size_t , const void *> > args;
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ 
+     if (!data1.empty())
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data ));
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
+     if (!data2.empty())
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
+ 
+    size_t gt[3] = { dst.cols, dst.rows, 1 };
+ #ifdef ANDROID
+     size_t lt[3] = { 16, 10, 1 };
+ #else
+     size_t lt[3] = { 16, 16, 1 };
+ #endif
+     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
+ }
+ 
+ static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
+                          const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
+ {
+     std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
+     if (!additionalOptions.empty())
+         build_options += additionalOptions;
+ 
+     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
+     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+ 
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
++    std::vector<std::pair<size_t , const void *> > args;
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ 
+     if (!data.empty())
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
+ 
       size_t gt[3] = { dst.cols, dst.rows, 1 };
   #ifdef ANDROID
       size_t lt[3] = { 16, 10, 1 };
@@@ -89,26 -166,68 +166,68 @@@
   static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
                            const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
   {
-     String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
+     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
+     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+     int pixels_per_work_item = 1;
+ 
+     if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
+     {
+         if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
+             pixels_per_work_item =  4;
+         else if (src.cols % 2 == 0)
+             pixels_per_work_item =  2;
+         else
+             pixels_per_work_item =  1;
+     }
+ 
+     std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
+     if (!additionalOptions.empty())
+         build_options += additionalOptions;
+ 
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
++    std::vector<std::pair<size_t , const void *> > args;
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ 
+     if (!data.empty())
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
+ 
+     size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
+ #ifdef ANDROID
+     size_t lt[3] = { 16, 10, 1 };
+ #else
+     size_t lt[3] = { 16, 16, 1 };
+ #endif
+     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
+ }
+ 
+ static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
+                          const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
+ {
- -    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
++    String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
       if (!additionalOptions.empty())
- -        build_options += additionalOptions;
+ +        build_options = build_options + additionalOptions;
   
       int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
       int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
   
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
   
       if (!data.empty())
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
+ +        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
   
       size_t gt[3] = {src.cols, src.rows, 1};
   #ifdef ANDROID
@@@ -119,22 -238,55 +238,55 @@@
       openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
   }
   
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
+                          const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
+ {
+     std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
+     if (!additionalOptions.empty())
+         build_options += additionalOptions;
+ 
+     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
+     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+ 
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
++    std::vector<std::pair<size_t , const void *> > args;
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
++    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
++    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ 
+     if (!data.empty())
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
+ 
+     size_t gt[3] = { dst.cols, dst.rows, 1 };
+ #ifdef ANDROID
+     size_t lt[3] = { 16, 10, 1 };
+ #else
+     size_t lt[3] = { 16, 16, 1 };
+ #endif
+     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
+ }
+ 
   static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
   {
-     String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
-                                   dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
       int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
       int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
   
- -    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
- -                                        src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
++    String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
++                                  src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
+ 
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
   
       size_t gt[3] = { dst.cols, dst.rows, 1 };
   #ifdef ANDROID
@@@ -147,21 -299,20 +299,20 @@@
   
   static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
   {
-     String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
-                                   src.depth(), greenbits, dst.channels());
- -    std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
- -                                       src.depth(), greenbits, dst.channels(), bidx);
++    String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
++                                  src.depth(), greenbits, dst.channels(), bidx);
       int src_offset = src.offset >> 1, src_step = src.step >> 1;
       int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
   
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
   
       size_t gt[3] = { dst.cols, dst.rows, 1 };
   #ifdef ANDROID
@@@ -174,21 -325,20 +325,20 @@@
   
   static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
   {
-     String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
-                                   src.depth(), greenbits, src.channels());
- -    std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
- -                                       src.depth(), greenbits, src.channels(), bidx);
++    String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
++                                  src.depth(), greenbits, src.channels(), bidx);
       int src_offset = (int)src.offset, src_step = (int)src.step;
       int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
   
- -    vector<pair<size_t , const void *> > args;
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
- -    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+ +    std::vector<std::pair<size_t , const void *> > args;
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
-     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
+ +    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
   
       size_t gt[3] = { dst.cols, dst.rows, 1 };
   #ifdef ANDROID
@@@ -267,18 -417,18 +417,18 @@@ static void cvtColor_caller(const oclMa
           fromRGB_caller(src, dst, bidx, "RGB2Gray");
           break;
       }
- -    case CV_GRAY2BGR: case CV_GRAY2BGRA:
+ +    case COLOR_GRAY2BGR: case COLOR_GRAY2BGRA:
       {
           CV_Assert(scn == 1);
- -        dcn  = code == CV_GRAY2BGRA ? 4 : 3;
+ +        dcn  = code == COLOR_GRAY2BGRA ? 4 : 3;
           dst.create(sz, CV_MAKETYPE(depth, dcn));
-         toRGB_caller(src, dst, 0, "Gray2RGB");
+         fromGray_caller(src, dst, 0, "Gray2RGB");
           break;
       }
- -    case CV_BGR2YUV: case CV_RGB2YUV:
+ +    case COLOR_BGR2YUV: case COLOR_RGB2YUV:
       {
           CV_Assert(scn == 3 || scn == 4);
- -        bidx = code == CV_BGR2YUV ? 0 : 2;
+ +        bidx = code == COLOR_BGR2YUV ? 0 : 2;
           dst.create(sz, CV_MAKETYPE(depth, 3));
           fromRGB_caller(src, dst, bidx, "RGB2YUV");
           break;
@@@ -303,13 -453,13 +453,13 @@@
   
           Size dstSz(sz.width, sz.height * 2 / 3);
           dst.create(dstSz, CV_MAKETYPE(depth, dcn));
-         toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
+         toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
           break;
       }
- -    case CV_BGR2YCrCb: case CV_RGB2YCrCb:
+ +    case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
       {
           CV_Assert(scn == 3 || scn == 4);
- -        bidx = code == CV_BGR2YCrCb ? 0 : 2;
+ +        bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
           dst.create(sz, CV_MAKETYPE(depth, 3));
           fromRGB_caller(src, dst, bidx, "RGB2YCrCb");
           break;
@@@ -464,11 -614,11 +614,11 @@@
               return;
           }
   
-         fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
+         toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
           break;
       }
- -    case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
- -    case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
+ +    case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
+ +    case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
       {
           if (dcn <= 0)
               dcn = 3;
@@@ -483,10 -633,10 +633,10 @@@
           dst.create(sz, CV_MAKETYPE(depth, dcn));
   
           std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
-         toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
+         fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
           break;
       }
- -    case CV_RGBA2mRGBA: case CV_mRGBA2RGBA:
+ +    case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
           {
               CV_Assert(scn == 4 && depth == CV_8U);
               dst.create(sz, CV_MAKETYPE(depth, 4));
diff --cc modules/ocl/src/filtering.cpp

index 8832b30,35aa226..b6e1fff
--- 1/modules/ocl/src/filtering.cpp
--- 2/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@@ -741,6 -739,135 +741,135 @@@ void cv::ocl::filter2D(const oclMat &sr
       f->apply(src, dst);
   }
   
- -    string option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
+ const int optimizedSepFilterLocalSize = 16;
+ static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
+                                    const Mat &row_kernel, const Mat &col_kernel, int bordertype = BORDER_DEFAULT)
+ {
+     size_t lt2[3] = {optimizedSepFilterLocalSize, optimizedSepFilterLocalSize, 1};
+     size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
+ 
+     unsigned int src_pitch = src.step;
+     unsigned int dst_pitch = dst.step;
+ 
+     int src_offset_x = (src.offset % src.step) / src.elemSize();
+     int src_offset_y = src.offset / src.step;
+ 
+     std::vector<std::pair<size_t , const void *> > args;
+     args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&src.data ));
+     args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
+ 
+     args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src_offset_x ));
+     args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src_offset_y ));
+ 
+     args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&dst.data ));
+     args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dst.offset ));
+     args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
+ 
+     args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholecols ));
+     args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholerows ));
+ 
+     args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dst.cols ));
+     args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&dst.rows ));
+ 
++    String option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
+         row_kernel.rows / 2, col_kernel.rows / 2 );
+ 
+     option += " -D KERNEL_MATRIX_X=";
+     for(int i=0; i<row_kernel.rows; i++)
+         option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
+     option += "0x0";
+ 
+     option += " -D KERNEL_MATRIX_Y=";
+     for(int i=0; i<col_kernel.rows; i++)
+         option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
+     option += "0x0";
+ 
+     switch(src.type())
+     {
+     case CV_8UC1:
+         option += " -D SRCTYPE=uchar -D CONVERT_SRCTYPE=convert_float -D WORKTYPE=float";
+         break;
+     case CV_32FC1:
+         option += " -D SRCTYPE=float -D CONVERT_SRCTYPE= -D WORKTYPE=float";
+         break;
+     case CV_8UC2:
+         option += " -D SRCTYPE=uchar2 -D CONVERT_SRCTYPE=convert_float2 -D WORKTYPE=float2";
+         break;
+     case CV_32FC2:
+         option += " -D SRCTYPE=float2 -D CONVERT_SRCTYPE= -D WORKTYPE=float2";
+         break;
+     case CV_8UC3:
+         option += " -D SRCTYPE=uchar3 -D CONVERT_SRCTYPE=convert_float3 -D WORKTYPE=float3";
+         break;
+     case CV_32FC3:
+         option += " -D SRCTYPE=float3 -D CONVERT_SRCTYPE= -D WORKTYPE=float3";
+         break;
+     case CV_8UC4:
+         option += " -D SRCTYPE=uchar4 -D CONVERT_SRCTYPE=convert_float4 -D WORKTYPE=float4";
+         break;
+     case CV_32FC4:
+         option += " -D SRCTYPE=float4 -D CONVERT_SRCTYPE= -D WORKTYPE=float4";
+         break;
+     default:
+         CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
+         break;
+     }
+     switch(dst.type())
+     {
+     case CV_8UC1:
+         option += " -D DSTTYPE=uchar -D CONVERT_DSTTYPE=convert_uchar_sat";
+         break;
+     case CV_8UC2:
+         option += " -D DSTTYPE=uchar2 -D CONVERT_DSTTYPE=convert_uchar2_sat";
+         break;
+     case CV_8UC3:
+         option += " -D DSTTYPE=uchar3 -D CONVERT_DSTTYPE=convert_uchar3_sat";
+         break;
+     case CV_8UC4:
+         option += " -D DSTTYPE=uchar4 -D CONVERT_DSTTYPE=convert_uchar4_sat";
+         break;
+     case CV_32FC1:
+         option += " -D DSTTYPE=float -D CONVERT_DSTTYPE=";
+         break;
+     case CV_32FC2:
+         option += " -D DSTTYPE=float2 -D CONVERT_DSTTYPE=";
+         break;
+     case CV_32FC3:
+         option += " -D DSTTYPE=float3 -D CONVERT_DSTTYPE=";
+         break;
+     case CV_32FC4:
+         option += " -D DSTTYPE=float4 -D CONVERT_DSTTYPE=";
+         break;
+     default:
+         CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
+         break;
+     }
+     switch(bordertype)
+     {
+     case cv::BORDER_CONSTANT:
+         option += " -D BORDER_CONSTANT";
+         break;
+     case cv::BORDER_REPLICATE:
+         option += " -D BORDER_REPLICATE";
+         break;
+     case cv::BORDER_REFLECT:
+         option += " -D BORDER_REFLECT";
+         break;
+     case cv::BORDER_REFLECT101:
+         option += " -D BORDER_REFLECT_101";
+         break;
+     case cv::BORDER_WRAP:
+         option += " -D BORDER_WRAP";
+         break;
+     default:
+         CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
+         break;
+     }
+ 
+     openCLExecuteKernel(src.clCxt, &filtering_sep_filter_singlepass, "sep_filter_singlepass", gt2, lt2, args,
+         -1, -1, option.c_str() );
+ }
+ 
   ////////////////////////////////////////////////////////////////////////////////////////////////////
   // SeparableFilter
   
@@@ -787,9 -914,38 +916,38 @@@ public
   Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
           const Ptr<BaseColumnFilter_GPU> &columnFilter)
   {
- -    return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
+ +    return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
   }
   
+ namespace
+ {
+ class SingleStepSeparableFilterEngine_GPU : public FilterEngine_GPU
+ {
+ public:
+     SingleStepSeparableFilterEngine_GPU( const Mat &rowKernel_, const Mat &columnKernel_, const int btype )
+     {
+         bordertype = btype;
+         rowKernel = rowKernel_;
+         columnKernel = columnKernel_;
+     }
+ 
+     virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
+     {
+         normalizeROI(roi, Size(rowKernel.rows, columnKernel.rows), Point(-1,-1), src.size());
+ 
+         oclMat srcROI = src(roi);
+         oclMat dstROI = dst(roi);
+ 
+         sepFilter2D_SinglePass(src, dst, rowKernel, columnKernel, bordertype);
+     }
+ 
+     Mat rowKernel;
+     Mat columnKernel;
+     int bordertype;
+ };
+ }
+ 
+ 
   static void GPUFilterBox(const oclMat &src, oclMat &dst,
                            Size &ksize, const Point anchor, const int borderType)
   {
diff --cc modules/ocl/src/gftt.cpp

index b072865,a82196d..09cd2a1
--- 1/modules/ocl/src/gftt.cpp
--- 2/modules/ocl/src/gftt.cpp
+++ b/modules/ocl/src/gftt.cpp
@@@ -48,125 -48,68 +48,68 @@@
   using namespace cv;
   using namespace cv::ocl;
   
+ // currently sort procedure on the host is more efficient
   static bool use_cpu_sorter = true;
   
- namespace
+ // compact structure for corners
+ struct DefCorner
   {
- enum SortMethod
+     float eig;  //eigenvalue of corner
+     short x;    //x coordinate of corner point
+     short y;    //y coordinate of corner point
+ } ;
+ 
+ // compare procedure for corner
+ //it is used for sort on the host side
+ struct DefCornerCompare
   {
-     CPU_STL,
-     BITONIC,
-     SELECTION
- };
- 
- const int GROUP_SIZE = 256;
- 
- template<SortMethod method>
- struct Sorter
- {
-     //typedef EigType;
- };
- 
- //TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
- template<>
- struct Sorter<CPU_STL>
- {
-     typedef oclMat EigType;
-     static cv::Mutex cs;
-     static Mat mat_eig;
- 
-     //prototype
-     static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
-     {
-         float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
-         float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
-         return v1 > v2;
-     }
-     static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+     bool operator()(const DefCorner a, const DefCorner b) const
       {
-         cv::AutoLock lock(cs);
-         //temporarily use STL's sort function
-         Mat mat_corners = corners;
-         mat_eig = eig_tex;
-         std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
-         corners = mat_corners;
+         return a.eig > b.eig;
       }
   };
- cv::Mutex Sorter<CPU_STL>::cs;
- cv::Mat   Sorter<CPU_STL>::mat_eig;
   
- template<>
- struct Sorter<BITONIC>
+ // sort corner point using opencl bitonicosrt implementation
+ static void sortCorners_caller(oclMat& corners, const int count)
   {
-     typedef TextureCL EigType;
- 
-     static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+     Context * cxt = Context::getContext();
+     int     GS = count/2;
+     int     LS = min(255,GS);
+     size_t  globalThreads[3] = {GS, 1, 1};
+     size_t  localThreads[3]  = {LS, 1, 1};
+ 
+     // 2^numStages should be equal to count or the output is invalid
+     int numStages = 0;
+     for(int i = count; i > 1; i >>= 1)
       {
-         Context * cxt = Context::getContext();
-         size_t globalThreads[3] = {count / 2, 1, 1};
-         size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
- 
-         // 2^numStages should be equal to count or the output is invalid
-         int numStages = 0;
-         for(int i = count; i > 1; i >>= 1)
-         {
-             ++numStages;
-         }
-         const int argc = 5;
-         std::vector< std::pair<size_t, const void *> > args(argc);
-         String kernelname = "sortCorners_bitonicSort";
-         args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
-         args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
-         args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
-         for(int stage = 0; stage < numStages; ++stage)
-         {
-             args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
-             for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
-             {
-                 args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
-                 openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
-             }
-         }
+         ++numStages;
       }
- };
- 
- template<>
- struct Sorter<SELECTION>
- {
-     typedef TextureCL EigType;
- 
-     static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+     const int argc = 4;
+     std::vector< std::pair<size_t, const void *> > args(argc);
+     std::string kernelname = "sortCorners_bitonicSort";
+     args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
+     args[1] = std::make_pair(sizeof(cl_int), (void *)&count);
+     for(int stage = 0; stage < numStages; ++stage)
       {
-         Context * cxt = Context::getContext();
- 
-         size_t globalThreads[3] = {count, 1, 1};
-         size_t localThreads[3]  = {GROUP_SIZE, 1, 1};
- 
-         std::vector< std::pair<size_t, const void *> > args;
-         //local
-         String kernelname = "sortCorners_selectionSortLocal";
-         int lds_size = GROUP_SIZE * sizeof(cl_float2);
-         args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
-         args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
-         args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
-         args.push_back( std::make_pair( lds_size,       (void*)NULL) );
- 
-         openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
- 
-         //final
-         kernelname = "sortCorners_selectionSortFinal";
-         args.pop_back();
-         openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
+         args[2] = std::make_pair(sizeof(cl_int), (void *)&stage);
+         for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
+         {
+             args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+             openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
+         }
       }
- };
+ }
   
- int findCorners_caller(
-     const TextureCL& eig,
-     const float threshold,
-     const oclMat& mask,
-     oclMat& corners,
-     const int max_count)
+ // find corners on matrix and put it into array
+ static void findCorners_caller(
+     const oclMat&   eig_mat,        //input matrix worth eigenvalues
+     oclMat&         eigMinMax,      //input with min and max values of eigenvalues
+     const float     qualityLevel,
+     const oclMat&   mask,
+     oclMat&         corners,        //output array with detected corners
+     oclMat&         counter)        //output value with number of detected corners, have to be 0 before call
   {
- -    string  opt;
++    String  opt;
       std::vector<int> k;
       Context * cxt = Context::getContext();
   
@@@ -174,28 -117,73 +117,73 @@@
   
       const int mask_strip = mask.step / mask.elemSize1();
   
-     oclMat g_counter(1, 1, CV_32SC1);
-     g_counter.setTo(0);
- -    args.push_back(make_pair( sizeof(cl_mem),   (void*)&(eig_mat.data)));
++    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&(eig_mat.data)));
   
-     args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&eig  ));
+     int src_pitch = (int)eig_mat.step;
- -    args.push_back(make_pair( sizeof(cl_int),   (void*)&src_pitch ));
- -    args.push_back(make_pair( sizeof(cl_mem),   (void*)&mask.data ));
- -    args.push_back(make_pair( sizeof(cl_mem),   (void*)&corners.data ));
- -    args.push_back(make_pair( sizeof(cl_int),   (void*)&mask_strip));
- -    args.push_back(make_pair( sizeof(cl_mem),   (void*)&eigMinMax.data ));
- -    args.push_back(make_pair( sizeof(cl_float), (void*)&qualityLevel ));
- -    args.push_back(make_pair( sizeof(cl_int),   (void*)&eig_mat.rows ));
- -    args.push_back(make_pair( sizeof(cl_int),   (void*)&eig_mat.cols ));
- -    args.push_back(make_pair( sizeof(cl_int),   (void*)&corners.cols ));
- -    args.push_back(make_pair( sizeof(cl_mem),   (void*)&counter.data ));
++    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&src_pitch ));
+ +    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&mask.data ));
+ +    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&corners.data ));
+ +    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&mask_strip));
-     args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold ));
-     args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows ));
-     args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols ));
-     args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count ));
-     args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
- 
-     size_t globalThreads[3] = {eig.cols, eig.rows, 1};
++    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&eigMinMax.data ));
++    args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
++    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&eig_mat.rows ));
++    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&eig_mat.cols ));
++    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&corners.cols ));
++    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&counter.data ));
+ 
+     size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
       size_t localThreads[3]  = {16, 16, 1};
+     if(!mask.empty())
+         opt += " -D WITH_MASK=1";
   
-     const char * opt = mask.empty() ? "" : "-D WITH_MASK";
-     openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
-     return std::min(Mat(g_counter).at<int>(0), max_count);
+      openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
+ }
+ 
+ 
+ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
+ {
+     size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
+     CV_Assert(groupnum != 0);
+ 
+     int dbsize = groupnum * 2 * src.elemSize();
+ 
+     ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
+ 
+     cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
+ 
+     int all_cols = src.step / src.elemSize();
+     int pre_cols = (src.offset % src.step) / src.elemSize();
+     int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
+     int invalid_cols = pre_cols + sec_cols;
+     int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
+     int offset = src.offset / src.elemSize();
+ 
+     {// first parallel pass
- -        vector<pair<size_t , const void *> > args;
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_data ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&offset));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
++        std::vector<std::pair<size_t , const void *> > args;
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
++        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
++        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
++        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
++        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
++        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
+         size_t globalThreads[3] = {groupnum * 256, 1, 1};
+         size_t localThreads[3] = {256, 1, 1};
+         openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
+                             args, -1, -1, "-D T=float -D DEPTH_5");
+     }
+ 
+     {// run final "serial" kernel to find accumulate results from threads and reset corner counter
- -        vector<pair<size_t , const void *> > args;
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_data ));
- -        args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum ));
- -        args.push_back( make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
++        std::vector<std::pair<size_t , const void *> > args;
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
++        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
++        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
+         size_t globalThreads[3] = {1, 1, 1};
+         size_t localThreads[3] = {1, 1, 1};
+         openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
+                             args, -1, -1);
+     }
   }
- }//unnamed namespace
   
   void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
   {
@@@ -209,54 -197,83 +197,83 @@@
       else
           cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
   
-     double maxVal = 0;
-     minMax(eig_, NULL, &maxVal);
+     ensureSizeIsEnough(1,1, CV_32SC1, counter_);
+ 
+     // find max eigenvalue and reset detected counters
+     minMaxEig_caller(eig_,eig_minmax_,counter_);
   
-     ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
+     // allocate buffer for kernels
+     int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
   
-     Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
-     int total = findCorners_caller(
-         *eig_tex,
-         static_cast<float>(maxVal * qualityLevel),
+     if(!use_cpu_sorter)
+     {   // round to 2^n
+         unsigned int n=1;
+         for(n=1;n<(unsigned int)corner_array_size;n<<=1);
+         corner_array_size = (int)n;
+ 
+         ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
+ 
+         // set to 0 to be able use bitonic sort on whole 2^n array
+         tmpCorners_.setTo(0);
+     }
+     else
+     {
+         ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
+     }
+ 
+     int total = tmpCorners_.cols; // by default the number of corner is full array
- -    vector<DefCorner>   tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
++    std::vector<DefCorner>   tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
+ 
+     //find points with high eigenvalue and put it into the output array
+     findCorners_caller(
+         eig_,
+         eig_minmax_,
+         static_cast<float>(qualityLevel),
           mask,
           tmpCorners_,
-         tmpCorners_.cols);
+         counter_);
+ 
+     if(!use_cpu_sorter)
+     {// sort detected corners on deivce side
+         sortCorners_caller(tmpCorners_, corner_array_size);
+     }
+     else
+     {// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
+         openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL));
+     }
+ 
+     //blocking read whole corners array (sorted or not sorted)
+     openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner));
   
       if (total == 0)
-     {
+     {// check for trivial case
           corners.release();
           return;
       }
+ 
       if(use_cpu_sorter)
-     {
-         Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
-     }
-     else
-     {
-         //if total is power of 2
-         if(((total - 1) & (total)) == 0)
-         {
-             Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
-         }
-         else
-         {
-             Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
-         }
+     {// sort detected corners on cpu side.
+         tmp.resize(total);
- -        cv::sort(tmp,DefCornerCompare());
++        std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
       }
   
- -    vector<Point2f> tmp2;
+     //estimate maximal size of final output array
+     int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
+     int D2 = (int)ceil(minDistance * minDistance);
+     // allocate output buffer
++    std::vector<Point2f> tmp2;
+     tmp2.reserve(total_max);
+ 
+ 
       if (minDistance < 1)
-     {
-         Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
-         tmpCorners_(roi_range).copyTo(corners);
+     {// we have not distance restriction. then just copy with conversion maximal allowed points into output array
+         for(int i=0;i<total_max && tmp[i].eig>0.0f;++i)
+         {
+             tmp2.push_back(Point2f(tmp[i].x,tmp[i].y));
+         }
       }
       else
-     {
-         std::vector<Point2f> tmp(total);
-         downloadPoints(tmpCorners_, tmp);
- 
-         std::vector<Point2f> tmp2;
-         tmp2.reserve(total);
- 
+     {// we have distance restriction. then start coping to output array from the first element and check distance for each next one
           const int cell_size = cvRound(minDistance);
           const int grid_width = (image.cols + cell_size - 1) / cell_size;
           const int grid_height = (image.rows + cell_size - 1) / cell_size;
@@@ -287,20 -307,18 +307,18 @@@
               {
                   for (int xx = x1; xx <= x2; xx++)
                   {
-                     std::vector<Point2f>& m = grid[yy * grid_width + xx];
- 
-                     if (!m.empty())
- -                    vector<Point2i>& m = grid[yy * grid_width + xx];
++                    std::vector<Point2i>& m = grid[yy * grid_width + xx];
+                     if (m.empty())
+                         continue;
+                     for(size_t j = 0; j < m.size(); j++)
                       {
-                         for(size_t j = 0; j < m.size(); j++)
+                         int dx = p.x - m[j].x;
+                         int dy = p.y - m[j].y;
+ 
+                         if (dx * dx + dy * dy < D2)
                           {
-                             float dx = p.x - m[j].x;
-                             float dy = p.y - m[j].y;
- 
-                             if (dx * dx + dy * dy < minDistance * minDistance)
-                             {
-                                 good = false;
-                                 goto break_out;
-                             }
+                             good = false;
+                             goto break_out_;
                           }
                       }
                   }
@@@ -319,10 -337,14 +337,14 @@@
               }
           }
   
-         corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
       }
+     int final_size = static_cast<int>(tmp2.size());
+     if(final_size>0)
+         corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
+     else
+         corners.release();
   }
- -void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v)
+ +void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
   {
       CV_DbgAssert(points.type() == CV_32FC2);
       points_v.resize(points.cols);
diff --cc modules/ocl/src/haar.cpp

index a023f8a,e334ad9..d38b3ba
--- 1/modules/ocl/src/haar.cpp
--- 2/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@@ -972,8 -970,39 +970,39 @@@ void OclCascadeClassifier::detectMultiS
               // init candiate global count by 0
               int pattern = 0;
               openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
-             // execute face detector
-             openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, localThreads, args, -1, -1, options.c_str());
+ 
+             if(WGNumTotal>WGNumSampled)
+             {// small images and each pixel is processed
+                 // setup global sizes to have linear array of workgroups with WGNum size
+                 int     pixelstep = 1;
+                 size_t  LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
+                 globalThreads[0] = LS[0]*(WGNumTotal-WGNumSampled);
+                 globalThreads[1] = LS[1];
+                 globalThreads[2] = 1;
- -                string options1 = options;
++                String options1 = options;
+                 options1 += format(" -D PIXEL_STEP=%d",pixelstep);
+                 options1 += format(" -D WGSTART=%d",WGNumSampled);
+                 options1 += format(" -D LSx=%d",LS[0]);
+                 options1 += format(" -D LSy=%d",LS[1]);
+                 // execute face detector
+                 openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options1.c_str());
+             }
+             if(WGNumSampled>0)
+             {// large images each 4th pixel is processed
+                 // setup global sizes to have linear array of workgroups with WGNum size
+                 int     pixelstep = 2;
+                 size_t  LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
+                 globalThreads[0] = LS[0]*WGNumSampled;
+                 globalThreads[1] = LS[1];
+                 globalThreads[2] = 1;
- -                string options2 = options;
++                String options2 = options;
+                 options2 += format(" -D PIXEL_STEP=%d",pixelstep);
+                 options2 += format(" -D WGSTART=%d",0);
+                 options2 += format(" -D LSx=%d",LS[0]);
+                 options2 += format(" -D LSy=%d",LS[1]);
+                 // execute face detector
+                 openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options2.c_str());
+             }
               //read candidate buffer back and put it into host list
               openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
               assert(candidate[0]<outputsz);
diff --cc modules/ocl/src/hog.cpp

index 70fe991,1f8afe5..3bdb382
--- 1/modules/ocl/src/hog.cpp
--- 2/modules/ocl/src/hog.cpp
+++ b/modules/ocl/src/hog.cpp
@@@ -1892,21 -1908,21 +1908,21 @@@ void cv::ocl::device::hog::compute_grad
       char correctGamma = (correct_gamma) ? 1 : 0;
       int img_step = img.step;
       int grad_quadstep = grad.step >> 3;
-     int qangle_step = qangle.step >> 1;
+     int qangle_step = qangle.step >> (1 + qangle_step_shift);
   
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&height));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale));
- -    args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
- -
- -    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&angle_scale));
+ +    args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
+ +
+ +    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
           localThreads, args, -1, -1);
   }
   
@@@ -1927,20 -1943,20 +1943,20 @@@ void cv::ocl::device::hog::compute_grad
       char correctGamma = (correct_gamma) ? 1 : 0;
       int img_step = img.step >> 2;
       int grad_quadstep = grad.step >> 3;
-     int qangle_step = qangle.step >> 1;
+     int qangle_step = qangle.step >> (1 + qangle_step_shift);
   
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&height));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&width));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&img_step));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
- -    args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data));
- -    args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale));
- -    args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
- -    args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
- -
- -    openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_step));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&grad_quadstep));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&qangle_step));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&grad.data));
+ +    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&qangle.data));
+ +    args.push_back( std::make_pair( sizeof(cl_float), (void *)&angle_scale));
+ +    args.push_back( std::make_pair( sizeof(cl_char), (void *)&correctGamma));
+ +    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cnbins));
+ +
+ +    openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads,
           localThreads, args, -1, -1);
   }
diff --cc modules/ocl/src/imgproc.cpp

index f730df1,3ce7ba6..0ac6271
--- 1/modules/ocl/src/imgproc.cpp
--- 2/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@@ -1035,62 -1033,104 +1035,104 @@@ namespace c
               else
                   scale = 1. / scale;
   
-             if (ksize > 0)
+             const int sobel_lsz = 16;
+             if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
+                 (ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
+                 src.wholerows > sobel_lsz + (ksize>>1) &&
+                 src.wholecols > sobel_lsz + (ksize>>1))
               {
-                 Context* clCxt = Context::getContext();
-                 if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 &&
-                     src.cols % 8 == 0 && src.rows % 8 == 0 &&
-                     ksize==3 &&
-                     (borderType ==cv::BORDER_REFLECT ||
-                      borderType == cv::BORDER_REPLICATE ||
-                      borderType ==cv::BORDER_REFLECT101 ||
-                      borderType ==cv::BORDER_WRAP))
+                 Dx.create(src.size(), CV_32FC1);
+                 Dy.create(src.size(), CV_32FC1);
+ 
+                 CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
+ 
+                 size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
+                 size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
+ 
+                 unsigned int src_pitch = src.step;
+                 unsigned int Dx_pitch = Dx.step;
+                 unsigned int Dy_pitch = Dy.step;
+ 
+                 int src_offset_x = (src.offset % src.step) / src.elemSize();
+                 int src_offset_y = src.offset / src.step;
+ 
+                 float _scale = scale;
+ 
+                 std::vector<std::pair<size_t , const void *> > args;
+                 args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&src.data ));
+                 args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
+ 
+                 args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
+                 args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
+ 
+                 args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&Dx.data ));
+                 args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dx.offset ));
+                 args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
+                 args.push_back( std::make_pair( sizeof(cl_mem)  , (void *)&Dy.data ));
+                 args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dy.offset ));
+                 args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
+ 
+                 args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholecols ));
+                 args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&src.wholerows ));
+ 
+                 args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dx.cols ));
+                 args.push_back( std::make_pair( sizeof(cl_int)  , (void *)&Dx.rows ));
+ 
+                 args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
+ 
- -                string option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
++                String option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
+                 switch(src.type())
                   {
-                     Dx.create(src.size(), CV_32FC1);
-                     Dy.create(src.size(), CV_32FC1);
- 
-                     const unsigned int block_x = 8;
-                     const unsigned int block_y = 8;
- 
-                     unsigned int src_pitch = src.step;
-                     unsigned int dst_pitch = Dx.cols;
- 
-                     float _scale = scale;
- 
-                     std::vector<std::pair<size_t , const void *> > args;
-                     args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
-                     args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
-                     args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
-                     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
-                     args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
-                     args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
-                     args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
-                     args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
-                     size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
- 
-                     String option = "-D BLK_X=8 -D BLK_Y=8";
-                     switch(borderType)
-                     {
-                     case cv::BORDER_REPLICATE:
-                         option += " -D BORDER_REPLICATE";
-                         break;
-                     case cv::BORDER_REFLECT:
-                         option += " -D BORDER_REFLECT";
-                         break;
-                     case cv::BORDER_REFLECT101:
-                         option += " -D BORDER_REFLECT101";
-                         break;
-                     case cv::BORDER_WRAP:
-                         option += " -D BORDER_WRAP";
-                         break;
-                     }
-                     openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
+                 case CV_8UC1:
+                     option += " -D SRCTYPE=uchar";
+                     break;
+                 case CV_32FC1:
+                     option += " -D SRCTYPE=float";
+                     break;
                   }
-                 else
+                 switch(borderType)
                   {
-                     Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
-                     Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
+                 case cv::BORDER_CONSTANT:
+                     option += " -D BORDER_CONSTANT";
+                     break;
+                 case cv::BORDER_REPLICATE:
+                     option += " -D BORDER_REPLICATE";
+                     break;
+                 case cv::BORDER_REFLECT:
+                     option += " -D BORDER_REFLECT";
+                     break;
+                 case cv::BORDER_REFLECT101:
+                     option += " -D BORDER_REFLECT_101";
+                     break;
+                 case cv::BORDER_WRAP:
+                     option += " -D BORDER_WRAP";
+                     break;
+                 default:
+                     CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
+                     break;
                   }
- -                string kernel_name;
+ 
++                String kernel_name;
+                 switch(ksize)
+                 {
+                 case -1:
+                     option += " -D SCHARR";
+                     kernel_name = "sobel3";
+                     break;
+                 case 3:
+                     kernel_name = "sobel3";
+                     break;
+                 case 5:
+                     kernel_name = "sobel5";
+                     break;
+                 case 7:
+                     kernel_name = "sobel7";
+                     break;
+                 default:
+                     CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
+                     break;
+                 }
+                 openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
               }
               else
               {
diff --cc modules/ts/src/ts_func.cpp
Simple merge
diff --cc samples/cpp/intelperc_capture.cpp

index 0000000,b81a278..40349e0

mode 000000,100644..100644
--- /dev/null
--- 2/samples/cpp/intelperc_capture.cpp
+++ b/samples/cpp/intelperc_capture.cpp
@@@ -1,0 -1,376 +1,376 @@@
- -    size_t profilesCount = (size_t)capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_COUNT);
+ // testOpenCVCam.cpp : Defines the entry point for the console application.
+ //
+ 
+ #include "opencv2/highgui/highgui.hpp"
+ 
+ #include <iostream>
+ 
+ using namespace cv;
+ using namespace std;
+ 
+ static bool g_printStreamSetting        = false;
+ static int g_imageStreamProfileIdx      = -1;
+ static int g_depthStreamProfileIdx      = -1;
+ static bool g_irStreamShow              = false;
+ static double g_imageBrightness         = -DBL_MAX;
+ static double g_imageContrast           = -DBL_MAX;
+ static bool g_printTiming               = false;
+ static bool g_showClosedPoint           = false;
+ 
+ 
+ static int g_closedDepthPoint[2];
+ 
+ static void printUsage(const char *arg0)
+ {
+     const char *filename = arg0;
+     while (*filename)
+         filename++;
+     while ((arg0 <= filename) && ('\\' != *filename) && ('/' != *filename))
+         filename--;
+     filename++;
+ 
+     cout << "This program demonstrates usage of camera supported\nby Intel Perceptual computing SDK." << endl << endl;
+     cout << "usage: " << filename << "[-ps] [-isp IDX] [-dsp IDX]\n [-ir] [-imb VAL] [-imc VAL]" << endl << endl;
+     cout << "   -ps,            print streams setting and profiles" << endl;
+     cout << "   -isp IDX,       set profile index of the image stream" << endl;
+     cout << "   -dsp IDX,       set profile index of the depth stream" << endl;
+     cout << "   -ir,            show data from IR stream" << endl;
+     cout << "   -imb VAL,       set brighness value for a image stream" << endl;
+     cout << "   -imc VAL,       set contrast value for a image stream" << endl;
+     cout << "   -pts,           print frame index and frame time" << endl;
+     cout << "   --show-closed,  print frame index and frame time" << endl;
+     cout <<  endl;
+ }
+ 
+ static void parseCMDLine(int argc, char* argv[])
+ {
+     if( argc == 1 )
+     {
+         printUsage(argv[0]);
+     }
+     else
+     {
+         for( int i = 1; i < argc; i++ )
+         {
+             if ((0 == strcmp(argv[i], "--help")) || (0 == strcmp( argv[i], "-h")))
+             {
+                 printUsage(argv[0]);
+                 exit(0);
+             }
+             else if ((0 == strcmp( argv[i], "--print-streams")) || (0 == strcmp( argv[i], "-ps")))
+             {
+                 g_printStreamSetting = true;
+             }
+             else if ((0 == strcmp( argv[i], "--image-stream-prof")) || (0 == strcmp( argv[i], "-isp")))
+             {
+                 g_imageStreamProfileIdx = atoi(argv[++i]);
+             }
+             else if ((0 == strcmp( argv[i], "--depth-stream-prof")) || (0 == strcmp( argv[i], "-dsp")))
+             {
+                 g_depthStreamProfileIdx = atoi(argv[++i]);
+             }
+             else if (0 == strcmp( argv[i], "-ir"))
+             {
+                 g_irStreamShow = true;
+             }
+             else if (0 == strcmp( argv[i], "-imb"))
+             {
+                 g_imageBrightness = atof(argv[++i]);
+             }
+             else if (0 == strcmp( argv[i], "-imc"))
+             {
+                 g_imageContrast = atof(argv[++i]);
+             }
+             else if (0 == strcmp(argv[i], "-pts"))
+             {
+                 g_printTiming = true;
+             }
+             else if (0 == strcmp(argv[i], "--show-closed"))
+             {
+                 g_showClosedPoint = true;
+             }
+             else
+             {
+                 cout << "Unsupported command line argument: " << argv[i] << "." << endl;
+                 exit(-1);
+             }
+         }
+         if (g_showClosedPoint && (-1 == g_depthStreamProfileIdx))
+         {
+             cerr << "For --show-closed depth profile has be selected" << endl;
+             exit(-1);
+         }
+     }
+ }
+ 
+ static void printStreamProperties(VideoCapture &capture)
+ {
- -    cout << "  Brightness = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_BRIGHTNESS) << endl;
- -    cout << "  Contrast = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_CONTRAST) << endl;
- -    cout << "  Saturation = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_SATURATION) << endl;
- -    cout << "  Hue = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_HUE) << endl;
- -    cout << "  Gamma = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_GAMMA) << endl;
- -    cout << "  Sharpness = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_SHARPNESS) << endl;
- -    cout << "  Gain = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_GAIN) << endl;
- -    cout << "  Backligh = " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_BACKLIGHT) << endl;
++    size_t profilesCount = (size_t)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
+     cout << "Image stream." << endl;
- -        capture.set(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
++    cout << "  Brightness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS) << endl;
++    cout << "  Contrast = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_CONTRAST) << endl;
++    cout << "  Saturation = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SATURATION) << endl;
++    cout << "  Hue = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_HUE) << endl;
++    cout << "  Gamma = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAMMA) << endl;
++    cout << "  Sharpness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SHARPNESS) << endl;
++    cout << "  Gain = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAIN) << endl;
++    cout << "  Backligh = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BACKLIGHT) << endl;
+     cout << "Image streams profiles:" << endl;
+     for (size_t i = 0; i < profilesCount; i++)
+     {
- -            (int)capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_FRAME_WIDTH);
++        capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
+         cout << "  Profile[" << i << "]: ";
+         cout << "width = " <<
- -            (int)capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_FRAME_HEIGHT);
++            (int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_WIDTH);
+         cout << ", height = " <<
- -            capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_FPS);
++            (int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_HEIGHT);
+         cout << ", fps = " <<
- -    profilesCount = (size_t)capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_COUNT);
++            capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FPS);
+         cout << endl;
+     }
+ 
- -    cout << "  Low confidence value = " << capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE) << endl;
- -    cout << "  Saturation value = " << capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE) << endl;
- -    cout << "  Confidence threshold = " << capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD) << endl;
- -    cout << "  Focal length = (" << capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ) << ", "
- -        << capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT) << ")" << endl;
++    profilesCount = (size_t)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
+     cout << "Depth stream." << endl;
- -        capture.set(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
++    cout << "  Low confidence value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE) << endl;
++    cout << "  Saturation value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE) << endl;
++    cout << "  Confidence threshold = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD) << endl;
++    cout << "  Focal length = (" << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ) << ", "
++        << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT) << ")" << endl;
+     cout << "Depth streams profiles:" << endl;
+     for (size_t i = 0; i < profilesCount; i++)
+     {
- -            (int)capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_FRAME_WIDTH);
++        capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
+         cout << "  Profile[" << i << "]: ";
+         cout << "width = " <<
- -            (int)capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_FRAME_HEIGHT);
++            (int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_WIDTH);
+         cout << ", height = " <<
- -            capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_FPS);
++            (int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_HEIGHT);
+         cout << ", fps = " <<
- -        if (capture.retrieve(uvMap, CV_CAP_INTELPERC_UVDEPTH_MAP))
++            capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FPS);
+         cout << endl;
+     }
+ }
+ 
+ static void imshowImage(const char *winname, Mat &image, VideoCapture &capture)
+ {
+     if (g_showClosedPoint)
+     {
+         Mat uvMap;
- -    short lowValue = (short)capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE);
- -    short saturationValue = (short)capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE);
++        if (capture.retrieve(uvMap, CAP_INTELPERC_UVDEPTH_MAP))
+         {
+             float *uvmap = (float *)uvMap.ptr() + 2 * (g_closedDepthPoint[0] * uvMap.cols + g_closedDepthPoint[1]);
+             int x = (int)((*uvmap) * image.cols); uvmap++;
+             int y = (int)((*uvmap) * image.rows);
+ 
+             if ((0 <= x) && (0 <= y))
+             {
+                 static const int pointSize = 4;
+                 for (int row = y; row < min(y + pointSize, image.rows); row++)
+                 {
+                     uchar* ptrDst = image.ptr(row) + x * 3 + 2;//+2 -> Red
+                     for (int col = 0; col < min(pointSize, image.cols - x); col++, ptrDst+=3)
+                     {
+                         *ptrDst = 255;
+                     }
+                 }
+             }
+         }
+     }
+     imshow(winname, image);
+ }
+ static void imshowIR(const char *winname, Mat &ir)
+ {
+     Mat image;
+     if (g_showClosedPoint)
+     {
+         image.create(ir.rows, ir.cols, CV_8UC3);
+         for (int row = 0; row < ir.rows; row++)
+         {
+             uchar* ptrDst = image.ptr(row);
+             short* ptrSrc = (short*)ir.ptr(row);
+             for (int col = 0; col < ir.cols; col++, ptrSrc++)
+             {
+                 uchar val = (uchar) ((*ptrSrc) >> 2);
+                 *ptrDst = val;  ptrDst++;
+                 *ptrDst = val;  ptrDst++;
+                 *ptrDst = val;  ptrDst++;
+             }
+         }
+ 
+         static const int pointSize = 4;
+         for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
+         {
+             uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
+             for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
+             {
+                 *ptrDst = 255;
+             }
+         }
+     }
+     else
+     {
+         image.create(ir.rows, ir.cols, CV_8UC1);
+         for (int row = 0; row < ir.rows; row++)
+         {
+             uchar* ptrDst = image.ptr(row);
+             short* ptrSrc = (short*)ir.ptr(row);
+             for (int col = 0; col < ir.cols; col++, ptrSrc++, ptrDst++)
+             {
+                 *ptrDst = (uchar) ((*ptrSrc) >> 2);
+             }
+         }
+     }
+ 
+     imshow(winname, image);
+ }
+ static void imshowDepth(const char *winname, Mat &depth, VideoCapture &capture)
+ {
- -    capture.open(CV_CAP_INTELPERC);
++    short lowValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE);
++    short saturationValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE);
+ 
+     Mat image;
+     if (g_showClosedPoint)
+     {
+         image.create(depth.rows, depth.cols, CV_8UC3);
+         for (int row = 0; row < depth.rows; row++)
+         {
+             uchar* ptrDst = image.ptr(row);
+             short* ptrSrc = (short*)depth.ptr(row);
+             for (int col = 0; col < depth.cols; col++, ptrSrc++)
+             {
+                 if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
+                 {
+                     *ptrDst = 0; ptrDst++;
+                     *ptrDst = 0; ptrDst++;
+                     *ptrDst = 0; ptrDst++;
+                 }
+                 else
+                 {
+                     uchar val = (uchar) ((*ptrSrc) >> 2);
+                     *ptrDst = val;  ptrDst++;
+                     *ptrDst = val;  ptrDst++;
+                     *ptrDst = val;  ptrDst++;
+                 }
+             }
+         }
+ 
+         static const int pointSize = 4;
+         for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
+         {
+             uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
+             for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
+             {
+                 *ptrDst = 255;
+             }
+         }
+     }
+     else
+     {
+         image.create(depth.rows, depth.cols, CV_8UC1);
+         for (int row = 0; row < depth.rows; row++)
+         {
+             uchar* ptrDst = image.ptr(row);
+             short* ptrSrc = (short*)depth.ptr(row);
+             for (int col = 0; col < depth.cols; col++, ptrSrc++, ptrDst++)
+             {
+                 if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
+                     *ptrDst = 0;
+                 else
+                     *ptrDst = (uchar) ((*ptrSrc) >> 2);
+             }
+         }
+     }
+     imshow(winname, image);
+ }
+ 
+ int main(int argc, char* argv[])
+ {
+     parseCMDLine(argc, argv);
+ 
+     VideoCapture capture;
- -        if (!capture.set(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_imageStreamProfileIdx))
++    capture.open(CAP_INTELPERC);
+     if (!capture.isOpened())
+     {
+         cerr << "Can not open a capture object." << endl;
+         return -1;
+     }
+ 
+     if (g_printStreamSetting)
+         printStreamProperties(capture);
+ 
+     if (-1 != g_imageStreamProfileIdx)
+     {
- -        if (!capture.set(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_depthStreamProfileIdx))
++        if (!capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_imageStreamProfileIdx))
+         {
+             cerr << "Can not setup a image stream." << endl;
+             return -1;
+         }
+     }
+     if (-1 != g_depthStreamProfileIdx)
+     {
- -        if (!capture.set(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0.0))
++        if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_depthStreamProfileIdx))
+         {
+             cerr << "Can not setup a depth stream." << endl;
+             return -1;
+         }
+     }
+     else if (g_irStreamShow)
+     {
- -        capture.set(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_BRIGHTNESS, g_imageBrightness);
++        if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, 0.0))
+         {
+             cerr << "Can not setup a IR stream." << endl;
+             return -1;
+         }
+     }
+     else
+     {
+         cout << "Streams not selected" << endl;
+         return 0;
+     }
+ 
+     //Setup additional properies only after set profile of the stream
+     if ( (-10000.0 < g_imageBrightness) && (g_imageBrightness < 10000.0))
- -        capture.set(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_BRIGHTNESS, g_imageContrast);
++        capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageBrightness);
+     if ( (0 < g_imageContrast) && (g_imageContrast < 10000.0))
- -        if ((-1 != g_depthStreamProfileIdx) && (capture.retrieve(depthImage, CV_CAP_INTELPERC_DEPTH_MAP)))
++        capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageContrast);
+ 
+     int frame = 0;
+     for(;;frame++)
+     {
+         Mat bgrImage;
+         Mat depthImage;
+         Mat irImage;
+ 
+         if (!capture.grab())
+         {
+             cout << "Can not grab images." << endl;
+             return -1;
+         }
+ 
- -        if ((g_irStreamShow) && (capture.retrieve(irImage, CV_CAP_INTELPERC_IR_MAP)))
++        if ((-1 != g_depthStreamProfileIdx) && (capture.retrieve(depthImage, CAP_INTELPERC_DEPTH_MAP)))
+         {
+             if (g_showClosedPoint)
+             {
+                 double minVal = 0.0; double maxVal = 0.0;
+                 minMaxIdx(depthImage, &minVal, &maxVal, g_closedDepthPoint);
+             }
+             imshowDepth("depth image", depthImage, capture);
+         }
- -        if ((-1 != g_imageStreamProfileIdx) && (capture.retrieve(bgrImage, CV_CAP_INTELPERC_IMAGE)))
++        if ((g_irStreamShow) && (capture.retrieve(irImage, CAP_INTELPERC_IR_MAP)))
+             imshowIR("ir image", irImage);
- -            cout << "Image frame: " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_POS_FRAMES)
- -                 << ", Depth(IR) frame: " << capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_POS_FRAMES) << endl;
- -            cout << "Image frame: " << capture.get(CV_CAP_INTELPERC_IMAGE_GENERATOR | CV_CAP_PROP_POS_MSEC)
- -                 << ", Depth(IR) frame: " << capture.get(CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_POS_MSEC) << endl;
++        if ((-1 != g_imageStreamProfileIdx) && (capture.retrieve(bgrImage, CAP_INTELPERC_IMAGE)))
+             imshowImage("color image", bgrImage, capture);
+ 
+         if (g_printTiming)
+         {
- -}
++            cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_FRAMES)
++                 << ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_FRAMES) << endl;
++            cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_MSEC)
++                 << ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_MSEC) << endl;
+         }
+         if( waitKey(30) >= 0 )
+             break;
+     }
+ 
+     return 0;
++}
diff --cc samples/ocl/facedetect.cpp

index fd570b5,3781059..c505932
--- 1/samples/ocl/facedetect.cpp
--- 2/samples/ocl/facedetect.cpp
+++ b/samples/ocl/facedetect.cpp
@@@ -14,7 -11,10 +14,10 @@@
   
   using namespace std;
   using namespace cv;
- -#define LOOP_NUM 10
+ 
+ +#define LOOP_NUM 1
+ #define MAX_THREADS 10
+ 
   
   ///////////////////////////single-threading faces detecting///////////////////////////////
   
@@@ -110,21 -110,16 +112,20 @@@ static int facedetect_one_thread(bool u
               if( frame.empty() )
                   break;
               if( iplImg->origin == IPL_ORIGIN_TL )
- -                frame.copyTo( frameCopy );
+ +                frame.copyTo( frameCopy0 );
               else
- -                flip( frame, frameCopy, 0 );
+ +                flip( frame, frameCopy0, 0 );
+ +            if( scale == 1)
+ +                frameCopy0.copyTo(frameCopy);
+ +            else
+ +                resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR);
   
-             work_end = 0;
               if(useCPU)
- -                detectCPU(frameCopy, faces, cpu_cascade, scale, false);
+ +                detectCPU(frameCopy, faces, cpu_cascade, 1);
               else
- -                detect(frameCopy, faces, cascade, scale, false);
+ +                detect(frameCopy, faces, cascade, 1);
   
- -            Draw(frameCopy, faces, scale);
+ +            Draw(frameCopy, faces, 1);
               if( waitKey( 10 ) >= 0 )
                   break;
           }
@@@ -136,19 -131,18 +137,19 @@@
           vector<Rect> faces;
           vector<Rect> ref_rst;
           double accuracy = 0.;
-         work_end = 0;
+ +        detectCPU(image, ref_rst, cpu_cascade, scale);
- -        for(int i = 0; i <= LOOP_NUM; i++)
+ +
+         cout << "loops: ";
+ +        for(int i = 0; i <= LOOP_NUM; i ++)
           {
-             cout << "loop" << i << endl;
+             cout << i << ", ";
               if(useCPU)
- -                detectCPU(image, faces, cpu_cascade, scale, i!=0);
+ +                detectCPU(image, faces, cpu_cascade, scale);
               else
               {
- -                detect(image, faces, cascade, scale, i!=0);
+ +                detect(image, faces, cascade, scale);
                   if(i == 0)
                   {
- -                    detectCPU(image, ref_rst, cpu_cascade, scale, false);
                       accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
                   }
               }
@@@ -196,7 -187,18 +194,18 @@@ static void detectFaces(std::string fil
       d_img.upload(img);
   
       std::vector<Rect> oclfaces;
-     cascade.detectMultiScale(d_img, oclfaces,  1.1, 3, 0 | CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
+     std::thread::id tid = std::this_thread::get_id();
+     std::cout << '[' << threadNum << "] "
+         << "ThreadID = " << tid
+         << ", CommandQueue = " << *(void**)ocl::getClCommandQueuePtr()
+         << endl;
+     for(int i = 0; i <= LOOP_NUM; i++)
+     {
+         if(i>0) workBegin(threadNum);
- -        cascade.detectMultiScale(d_img, oclfaces,  1.1, 3, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30), Size(0, 0));
++        cascade.detectMultiScale(d_img, oclfaces,  1.1, 3, 0|CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
+         if(i>0) workEnd(threadNum);
+     }
+     std::cout << '[' << threadNum << "] " << "Average time = " << getTotalTime(threadNum) / LOOP_NUM << " ms" << endl;
   
       for(unsigned int i = 0; i<oclfaces.size(); i++)
           rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3);
@@@ -231,18 -236,17 +243,17 @@@ int main( int argc, const char** argv 
   {
   
       const char* keys =
- -        "{ h | help       | false       | print help message }"
- -        "{ i | input      |             | specify input image }"
- -        "{ t | template   | haarcascade_frontalface_alt.xml |"
+ +        "{ h help       | false       | print help message }"
+ +        "{ i input      |             | specify input image }"
+ +        "{ t template   | haarcascade_frontalface_alt.xml |"
           " specify template file path }"
- -        "{ c | scale      |   1.0       | scale image }"
- -        "{ s | use_cpu    | false       | use cpu or gpu to process the image }"
- -        "{ o | output     | | specify output image save path(only works when input is images) }"
- -        "{ n | thread_num |      1      | set number of threads >= 1 }";
+ +        "{ c scale      |   1.0       | scale image }"
+ +        "{ s use_cpu    | false       | use cpu or gpu to process the image }"
-         "{ o output     | facedetect_output.jpg  |"
-         " specify output image save path(only works when input is images) }"
++        "{ o output     | | specify output image save path(only works when input is images) }"
+ +        "{ n thread_num |      1      | set number of threads >= 1 }";
   
       CommandLineParser cmd(argc, argv, keys);
- -    if (cmd.get<bool>("help"))
+ +    if (cmd.has("help"))
       {
           cout << "Usage : facedetect [options]" << endl;
           cout << "Available options:" << endl;
@@@ -324,8 -327,8 +333,8 @@@ void Draw(Mat& img, vector<Rect>& faces
           radius = cvRound((r->width + r->height)*0.25*scale);
           circle( img, center, radius, color, 3, 8, 0 );
       }
-     //imwrite( outputName, img );
-     if(abs(scale-1.0)>.001)
- -    if( !outputName.empty() ) imwrite( outputName, img );
++    //if( !outputName.empty() ) imwrite( outputName, img );
+     if( abs(scale-1.0)>.001 )
       {
           resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));
       }
author	Roman Donchenko <roman.donchenko@itseez.com>
	Mon, 23 Dec 2013 14:50:17 +0000 (18:50 +0400)
committer	Roman Donchenko <roman.donchenko@itseez.com>
	Mon, 23 Dec 2013 14:50:17 +0000 (18:50 +0400)
		1	2
CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/OpenCVCompilerOptions.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/OpenCVFindLibsVideo.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/templates/cvconfig.h.in	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/doc/operations_on_arrays.rst	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/dxt.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/core/src/matrix.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/include/opencv2/highgui.hpp	patch \|	diff1 \|	\|	blob \| history
modules/highgui/include/opencv2/highgui/highgui_c.h	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/src/cap.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/src/precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/highgui/test/test_precomp.hpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/imgproc/src/imgwarp.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/java/generator/gen_java.py	patch \|	diff1 \|	diff2 \|	blob \| history
modules/nonfree/src/surf.ocl.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/doc/image_filtering.rst	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/include/opencv2/ocl.hpp	patch \|	diff1 \|	\|	blob \| history
modules/ocl/src/color.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/filtering.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/gftt.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/haar.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/hog.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ocl/src/imgproc.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
modules/ts/src/ts_func.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
samples/cpp/intelperc_capture.cpp	patch \|	\|	diff2 \|	blob \| history
samples/ocl/facedetect.cpp	patch \|	diff1 \|	diff2 \|	blob \| history