modules/gpu/src/imgproc.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
   2 //\r
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
   4 //\r
   5 //  By downloading, copying, installing or using the software you agree to this license.\r
   6 //  If you do not agree to this license, do not download, install,\r
   7 //  copy or use the software.\r
   8 //\r
   9 //\r
  10 //                           License Agreement\r
  11 //                For Open Source Computer Vision Library\r
  12 //\r
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
  15 // Third party copyrights are property of their respective owners.\r
  16 //\r
  17 // Redistribution and use in source and binary forms, with or without modification,\r
  18 // are permitted provided that the following conditions are met:\r
  19 //\r
  20 //   * Redistribution's of source code must retain the above copyright notice,\r
  21 //     this list of conditions and the following disclaimer.\r
  22 //\r
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
  24 //     this list of conditions and the following disclaimer in the documentation\r
  25 //     and/or other materials provided with the distribution.\r
  26 //\r
  27 //   * The name of the copyright holders may not be used to endorse or promote products\r
  28 //     derived from this software without specific prior written permission.\r
  29 //\r
  30 // This software is provided by the copyright holders and contributors "as is" and\r
  31 // any express or implied warranties, including, but not limited to, the implied\r
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
  34 // indirect, incidental, special, exemplary, or consequential damages\r
  35 // (including, but not limited to, procurement of substitute goods or services;\r
  36 // loss of use, data, or profits; or business interruption) however caused\r
  37 // and on any theory of liability, whether in contract, strict liability,\r
  38 // or tort (including negligence or otherwise) arising in any way out of\r
  39 // the use of this software, even if advised of the possibility of such damage.\r
  40 //\r
  41 //M*/\r
  42 \r
  43 #include "precomp.hpp"\r
  44 \r
  45 using namespace cv;\r
  46 using namespace cv::gpu;\r
  47 \r
  48 #if !defined (HAVE_CUDA)\r
  49 \r
  50 void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&){ throw_nogpu(); }\r
  51 void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  52 void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  53 void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  54 void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }\r
  55 void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }\r
  56 void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
  57 void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  58 void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  59 void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, double, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  60 void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  61 void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  62 void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }\r
  63 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  64 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  65 void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  66 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  67 void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
  68 void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_nogpu(); }\r
  69 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_nogpu(); }\r
  70 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  71 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  72 void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  73 void cv::gpu::histEven(const GpuMat&, GpuMat*, GpuMat&, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  74 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }\r
  75 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  76 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_nogpu(); }\r
  77 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, GpuMat&, Stream&) { throw_nogpu(); }\r
  78 void cv::gpu::calcHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  79 void cv::gpu::calcHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  80 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  81 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  82 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  83 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  84 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  85 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  86 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  87 void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }\r
  88 void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool) { throw_nogpu(); }\r
  89 void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); }\r
  90 void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }\r
  91 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }\r
  92 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }\r
  93 void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  94 void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  95 void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  96 void cv::gpu::PyrDownBuf::create(Size, int) { throw_nogpu(); }\r
  97 void cv::gpu::pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream&) { throw_nogpu(); }\r
  98 void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  99 void cv::gpu::PyrUpBuf::create(Size, int) { throw_nogpu(); }\r
 100 void cv::gpu::pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream&) { throw_nogpu(); }\r
 101 void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
 102 void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
 103 void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
 104 void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
 105 cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); }\r
 106 void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); }\r
 107 void cv::gpu::CannyBuf::release() { throw_nogpu(); }\r
 108 \r
 109 #else /* !defined (HAVE_CUDA) */\r
 110 \r
 111 namespace cv { namespace gpu {  namespace imgproc\r
 112 {\r
 113     void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
 114     void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
 115 \r
 116     extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);\r
 117     extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);\r
 118 \r
 119     void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 120     void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 121 \r
 122     void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 123     void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 124 }}}\r
 125 \r
 126 ////////////////////////////////////////////////////////////////////////\r
 127 // remap\r
 128 \r
 129 void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap)\r
 130 {\r
 131     typedef void (*remap_gpu_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
 132     static const remap_gpu_t callers[] = {imgproc::remap_gpu_1c, 0, imgproc::remap_gpu_3c};\r
 133 \r
 134     CV_Assert((src.type() == CV_8U || src.type() == CV_8UC3) && xmap.type() == CV_32F && ymap.type() == CV_32F);\r
 135 \r
 136     dst.create(xmap.size(), src.type());\r
 137 \r
 138     callers[src.channels() - 1](src, xmap, ymap, dst);\r
 139 }\r
 140 \r
 141 ////////////////////////////////////////////////////////////////////////\r
 142 // meanShiftFiltering_GPU\r
 143 \r
 144 void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria)\r
 145 {\r
 146     if( src.empty() )\r
 147         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 148 \r
 149     if( src.depth() != CV_8U || src.channels() != 4 )\r
 150         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 151 \r
 152     dst.create( src.size(), CV_8UC4 );\r
 153 \r
 154     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 155         criteria.maxCount = 5;\r
 156 \r
 157     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 158 \r
 159     float eps;\r
 160     if( !(criteria.type & TermCriteria::EPS) )\r
 161         eps = 1.f;\r
 162     eps = (float)std::max(criteria.epsilon, 0.0);\r
 163 \r
 164     imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);\r
 165 }\r
 166 \r
 167 ////////////////////////////////////////////////////////////////////////\r
 168 // meanShiftProc_GPU\r
 169 \r
 170 void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria)\r
 171 {\r
 172     if( src.empty() )\r
 173         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 174 \r
 175     if( src.depth() != CV_8U || src.channels() != 4 )\r
 176         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 177 \r
 178     dstr.create( src.size(), CV_8UC4 );\r
 179     dstsp.create( src.size(), CV_16SC2 );\r
 180 \r
 181     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 182         criteria.maxCount = 5;\r
 183 \r
 184     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 185 \r
 186     float eps;\r
 187     if( !(criteria.type & TermCriteria::EPS) )\r
 188         eps = 1.f;\r
 189     eps = (float)std::max(criteria.epsilon, 0.0);\r
 190 \r
 191     imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);\r
 192 }\r
 193 \r
 194 ////////////////////////////////////////////////////////////////////////\r
 195 // drawColorDisp\r
 196 \r
 197 namespace\r
 198 {\r
 199     template <typename T>\r
 200     void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)\r
 201     {\r
 202         dst.create(src.size(), CV_8UC4);\r
 203 \r
 204         imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, dst, ndisp, stream);\r
 205     }\r
 206 \r
 207     typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);\r
 208 \r
 209     const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};\r
 210 }\r
 211 \r
 212 void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)\r
 213 {\r
 214     CV_Assert(src.type() == CV_8U || src.type() == CV_16S);\r
 215 \r
 216     drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));\r
 217 }\r
 218 \r
 219 ////////////////////////////////////////////////////////////////////////\r
 220 // reprojectImageTo3D\r
 221 \r
 222 namespace\r
 223 {\r
 224     template <typename T>\r
 225     void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)\r
 226     {\r
 227         xyzw.create(disp.rows, disp.cols, CV_32FC4);\r
 228         imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);\r
 229     }\r
 230 \r
 231     typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);\r
 232 \r
 233     const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller<unsigned char>, 0, 0, reprojectImageTo3D_caller<short>, 0, 0, 0, 0};\r
 234 }\r
 235 \r
 236 void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, Stream& stream)\r
 237 {\r
 238     CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);\r
 239 \r
 240     reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));\r
 241 }\r
 242 \r
 243 ////////////////////////////////////////////////////////////////////////\r
 244 // resize\r
 245 \r
 246 void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
 247 {\r
 248     static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};\r
 249 \r
 250     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
 251     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);\r
 252 \r
 253     CV_Assert( src.size().area() > 0 );\r
 254     CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );\r
 255 \r
 256     if( dsize == Size() )\r
 257     {\r
 258         dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));\r
 259     }\r
 260     else\r
 261     {\r
 262         fx = (double)dsize.width / src.cols;\r
 263         fy = (double)dsize.height / src.rows;\r
 264     }\r
 265 \r
 266     dst.create(dsize, src.type());\r
 267 \r
 268     NppiSize srcsz;\r
 269     srcsz.width  = src.cols;\r
 270     srcsz.height = src.rows;\r
 271     NppiRect srcrect;\r
 272     srcrect.x = srcrect.y = 0;\r
 273     srcrect.width  = src.cols;\r
 274     srcrect.height = src.rows;\r
 275     NppiSize dstsz;\r
 276     dstsz.width  = dst.cols;\r
 277     dstsz.height = dst.rows;\r
 278 \r
 279     cudaStream_t stream = StreamAccessor::getStream(s);\r
 280 \r
 281     NppStreamHandler h(stream);\r
 282 \r
 283     if (src.type() == CV_8UC1)\r
 284     {\r
 285         nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 286             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 287     }\r
 288     else\r
 289     {\r
 290         nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 291             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 292     }\r
 293 \r
 294     if (stream == 0)\r
 295         cudaSafeCall( cudaDeviceSynchronize() );\r
 296 }\r
 297 \r
 298 ////////////////////////////////////////////////////////////////////////\r
 299 // copyMakeBorder\r
 300 \r
 301 void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value, Stream& s)\r
 302 {\r
 303     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1);\r
 304 \r
 305     dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
 306 \r
 307     NppiSize srcsz;\r
 308     srcsz.width  = src.cols;\r
 309     srcsz.height = src.rows;\r
 310     NppiSize dstsz;\r
 311     dstsz.width  = dst.cols;\r
 312     dstsz.height = dst.rows;\r
 313 \r
 314     cudaStream_t stream = StreamAccessor::getStream(s);\r
 315 \r
 316     NppStreamHandler h(stream);\r
 317 \r
 318     switch (src.type())\r
 319     {\r
 320     case CV_8UC1:\r
 321         {\r
 322             Npp8u nVal = static_cast<Npp8u>(value[0]);\r
 323             nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 324                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 325             break;\r
 326         }\r
 327     case CV_8UC4:\r
 328         {\r
 329             Npp8u nVal[] = {static_cast<Npp8u>(value[0]), static_cast<Npp8u>(value[1]), static_cast<Npp8u>(value[2]), static_cast<Npp8u>(value[3])};\r
 330             nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 331                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 332             break;\r
 333         }\r
 334     case CV_32SC1:\r
 335         {\r
 336             Npp32s nVal = static_cast<Npp32s>(value[0]);\r
 337             nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 338                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 339             break;\r
 340         }\r
 341     case CV_32FC1:\r
 342         {\r
 343             Npp32f val = static_cast<Npp32f>(value[0]);\r
 344             Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
 345             nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 346                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 347             break;\r
 348         }\r
 349     default:\r
 350         CV_Assert(!"Unsupported source type");\r
 351     }\r
 352 \r
 353     if (stream == 0)\r
 354         cudaSafeCall( cudaDeviceSynchronize() );\r
 355 }\r
 356 \r
 357 ////////////////////////////////////////////////////////////////////////\r
 358 // warp\r
 359 \r
 360 namespace\r
 361 {\r
 362     typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,\r
 363                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 364                                        int interpolation);\r
 365     typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,\r
 366                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 367                                        int interpolation);\r
 368     typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,\r
 369                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 370                                        int interpolation);\r
 371     typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,\r
 372                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 373                                        int interpolation);\r
 374 \r
 375     void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,\r
 376                        npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],\r
 377                        npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2], cudaStream_t stream)\r
 378     {\r
 379         static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 380 \r
 381         int interpolation = flags & INTER_MAX;\r
 382 \r
 383         CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);\r
 384         CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 385 \r
 386         dst.create(dsize, src.type());\r
 387 \r
 388         NppiSize srcsz;\r
 389         srcsz.height = src.rows;\r
 390         srcsz.width = src.cols;\r
 391         NppiRect srcroi;\r
 392         srcroi.x = srcroi.y = 0;\r
 393         srcroi.height = src.rows;\r
 394         srcroi.width = src.cols;\r
 395         NppiRect dstroi;\r
 396         dstroi.x = dstroi.y = 0;\r
 397         dstroi.height = dst.rows;\r
 398         dstroi.width = dst.cols;\r
 399 \r
 400         int warpInd = (flags & WARP_INVERSE_MAP) >> 4;\r
 401 \r
 402         NppStreamHandler h(stream);\r
 403 \r
 404         switch (src.depth())\r
 405         {\r
 406         case CV_8U:\r
 407             nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 408                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 409             break;\r
 410         case CV_16U:\r
 411             nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr<Npp16u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 412                 dst.ptr<Npp16u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 413             break;\r
 414         case CV_32S:\r
 415             nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr<Npp32s>(), srcsz, static_cast<int>(src.step), srcroi,\r
 416                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 417             break;\r
 418         case CV_32F:\r
 419             nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr<Npp32f>(), srcsz, static_cast<int>(src.step), srcroi,\r
 420                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 421             break;\r
 422         default:\r
 423             CV_Assert(!"Unsupported source type");\r
 424         }\r
 425 \r
 426         if (stream == 0)\r
 427             cudaSafeCall( cudaDeviceSynchronize() );\r
 428     }\r
 429 }\r
 430 \r
 431 void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 432 {\r
 433     static npp_warp_8u_t npp_warpAffine_8u[][2] =\r
 434         {\r
 435             {0, 0},\r
 436             {nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},\r
 437             {0, 0},\r
 438             {nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},\r
 439             {nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}\r
 440         };\r
 441     static npp_warp_16u_t npp_warpAffine_16u[][2] =\r
 442         {\r
 443             {0, 0},\r
 444             {nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},\r
 445             {0, 0},\r
 446             {nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},\r
 447             {nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}\r
 448         };\r
 449     static npp_warp_32s_t npp_warpAffine_32s[][2] =\r
 450         {\r
 451             {0, 0},\r
 452             {nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},\r
 453             {0, 0},\r
 454             {nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},\r
 455             {nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}\r
 456         };\r
 457     static npp_warp_32f_t npp_warpAffine_32f[][2] =\r
 458         {\r
 459             {0, 0},\r
 460             {nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},\r
 461             {0, 0},\r
 462             {nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},\r
 463             {nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}\r
 464         };\r
 465 \r
 466     CV_Assert(M.rows == 2 && M.cols == 3);\r
 467 \r
 468     double coeffs[2][3];\r
 469     Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);\r
 470     M.convertTo(coeffsMat, coeffsMat.type());\r
 471 \r
 472     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f, StreamAccessor::getStream(s));\r
 473 }\r
 474 \r
 475 void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 476 {\r
 477     static npp_warp_8u_t npp_warpPerspective_8u[][2] =\r
 478         {\r
 479             {0, 0},\r
 480             {nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},\r
 481             {0, 0},\r
 482             {nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},\r
 483             {nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}\r
 484         };\r
 485     static npp_warp_16u_t npp_warpPerspective_16u[][2] =\r
 486         {\r
 487             {0, 0},\r
 488             {nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},\r
 489             {0, 0},\r
 490             {nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},\r
 491             {nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}\r
 492         };\r
 493     static npp_warp_32s_t npp_warpPerspective_32s[][2] =\r
 494         {\r
 495             {0, 0},\r
 496             {nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},\r
 497             {0, 0},\r
 498             {nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},\r
 499             {nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}\r
 500         };\r
 501     static npp_warp_32f_t npp_warpPerspective_32f[][2] =\r
 502         {\r
 503             {0, 0},\r
 504             {nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},\r
 505             {0, 0},\r
 506             {nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},\r
 507             {nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}\r
 508         };\r
 509 \r
 510     CV_Assert(M.rows == 3 && M.cols == 3);\r
 511 \r
 512     double coeffs[3][3];\r
 513     Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);\r
 514     M.convertTo(coeffsMat, coeffsMat.type());\r
 515 \r
 516     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));\r
 517 }\r
 518 \r
 519 //////////////////////////////////////////////////////////////////////////////\r
 520 // buildWarpPlaneMaps\r
 521 \r
 522 namespace cv { namespace gpu { namespace imgproc\r
 523 {\r
 524     void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 525                             const float r[9], const float rinv[9], float f, float s, float dist,\r
 526                             float half_w, float half_h, cudaStream_t stream);\r
 527 }}}\r
 528 \r
 529 void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 530                                  double dist, GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 531 {\r
 532     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 533     Mat Rinv = R.inv();\r
 534     CV_Assert(Rinv.isContinuous());\r
 535 \r
 536     map_x.create(dst_roi.size(), CV_32F);\r
 537     map_y.create(dst_roi.size(), CV_32F);\r
 538     imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 539                                 static_cast<float>(f), static_cast<float>(s), static_cast<float>(dist), \r
 540                                 0.5f*src_size.width, 0.5f*src_size.height, StreamAccessor::getStream(stream));\r
 541 }\r
 542 \r
 543 //////////////////////////////////////////////////////////////////////////////\r
 544 // buildWarpCylyndricalMaps\r
 545 \r
 546 namespace cv { namespace gpu { namespace imgproc\r
 547 {\r
 548     void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 549                                   const float r[9], const float rinv[9], float f, float s,\r
 550                                   float half_w, float half_h, cudaStream_t stream);\r
 551 }}}\r
 552 \r
 553 void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 554                                        GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 555 {\r
 556     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 557     Mat Rinv = R.inv();\r
 558     CV_Assert(Rinv.isContinuous());\r
 559 \r
 560     map_x.create(dst_roi.size(), CV_32F);\r
 561     map_y.create(dst_roi.size(), CV_32F);\r
 562     imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 563                                       static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
 564                                       StreamAccessor::getStream(stream));\r
 565 }\r
 566 \r
 567 \r
 568 //////////////////////////////////////////////////////////////////////////////\r
 569 // buildWarpSphericalMaps\r
 570 \r
 571 namespace cv { namespace gpu { namespace imgproc\r
 572 {\r
 573     void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 574                                 const float r[9], const float rinv[9], float f, float s,\r
 575                                 float half_w, float half_h, cudaStream_t stream);\r
 576 }}}\r
 577 \r
 578 void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 579                                      GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 580 {\r
 581     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 582     Mat Rinv = R.inv();\r
 583     CV_Assert(Rinv.isContinuous());\r
 584 \r
 585     map_x.create(dst_roi.size(), CV_32F);\r
 586     map_y.create(dst_roi.size(), CV_32F);\r
 587     imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 588                                     static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
 589                                     StreamAccessor::getStream(stream));\r
 590 }\r
 591 \r
 592 ////////////////////////////////////////////////////////////////////////\r
 593 // rotate\r
 594 \r
 595 void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& s)\r
 596 {\r
 597     static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 598 \r
 599     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
 600     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 601 \r
 602     dst.create(dsize, src.type());\r
 603 \r
 604     NppiSize srcsz;\r
 605     srcsz.height = src.rows;\r
 606     srcsz.width = src.cols;\r
 607     NppiRect srcroi;\r
 608     srcroi.x = srcroi.y = 0;\r
 609     srcroi.height = src.rows;\r
 610     srcroi.width = src.cols;\r
 611     NppiRect dstroi;\r
 612     dstroi.x = dstroi.y = 0;\r
 613     dstroi.height = dst.rows;\r
 614     dstroi.width = dst.cols;\r
 615 \r
 616     cudaStream_t stream = StreamAccessor::getStream(s);\r
 617 \r
 618     NppStreamHandler h(stream);\r
 619 \r
 620     if (src.type() == CV_8UC1)\r
 621     {\r
 622         nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 623             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 624     }\r
 625     else\r
 626     {\r
 627         nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 628             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 629     }\r
 630 \r
 631     if (stream == 0)\r
 632         cudaSafeCall( cudaDeviceSynchronize() );\r
 633 }\r
 634 \r
 635 ////////////////////////////////////////////////////////////////////////\r
 636 // integral\r
 637 \r
 638 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)\r
 639 {\r
 640     GpuMat buffer;\r
 641     integralBuffered(src, sum, buffer, s);\r
 642 }\r
 643 \r
 644 void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)\r
 645 {\r
 646     CV_Assert(src.type() == CV_8UC1);\r
 647 \r
 648     sum.create(src.rows + 1, src.cols + 1, CV_32S);\r
 649     \r
 650     NcvSize32u roiSize;\r
 651     roiSize.width = src.cols;\r
 652     roiSize.height = src.rows;\r
 653 \r
 654         cudaDeviceProp prop;\r
 655         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 656 \r
 657     Ncv32u bufSize;\r
 658     nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );\r
 659     ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);\r
 660 \r
 661     cudaStream_t stream = StreamAccessor::getStream(s);\r
 662 \r
 663     NppStStreamHandler h(stream);\r
 664 \r
 665     nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step), \r
 666         sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );\r
 667 \r
 668     if (stream == 0)\r
 669         cudaSafeCall( cudaDeviceSynchronize() );\r
 670 }\r
 671 \r
 672 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& s)\r
 673 {\r
 674     CV_Assert(src.type() == CV_8UC1);\r
 675 \r
 676     int width = src.cols + 1, height = src.rows + 1;\r
 677 \r
 678     sum.create(height, width, CV_32S);\r
 679     sqsum.create(height, width, CV_32F);\r
 680 \r
 681     NppiSize sz;\r
 682     sz.width = src.cols;\r
 683     sz.height = src.rows;\r
 684 \r
 685     cudaStream_t stream = StreamAccessor::getStream(s);\r
 686 \r
 687     NppStreamHandler h(stream);\r
 688 \r
 689     nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), static_cast<int>(src.step), \r
 690         sum.ptr<Npp32s>(), static_cast<int>(sum.step), sqsum.ptr<Npp32f>(), static_cast<int>(sqsum.step), sz, 0, 0.0f, height) );\r
 691 \r
 692     if (stream == 0)\r
 693         cudaSafeCall( cudaDeviceSynchronize() );\r
 694 }\r
 695 \r
 696 //////////////////////////////////////////////////////////////////////////////\r
 697 // sqrIntegral\r
 698 \r
 699 void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)\r
 700 {\r
 701     CV_Assert(src.type() == CV_8U);\r
 702 \r
 703     NcvSize32u roiSize;\r
 704     roiSize.width = src.cols;\r
 705     roiSize.height = src.rows;\r
 706 \r
 707         cudaDeviceProp prop;\r
 708         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 709 \r
 710     Ncv32u bufSize;\r
 711     nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));       \r
 712     GpuMat buf(1, bufSize, CV_8U);\r
 713 \r
 714     cudaStream_t stream = StreamAccessor::getStream(s);\r
 715 \r
 716     NppStStreamHandler h(stream);\r
 717 \r
 718     sqsum.create(src.rows + 1, src.cols + 1, CV_64F);\r
 719     nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step), \r
 720             sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));\r
 721 \r
 722     if (stream == 0)\r
 723         cudaSafeCall( cudaDeviceSynchronize() );\r
 724 }\r
 725 \r
 726 //////////////////////////////////////////////////////////////////////////////\r
 727 // columnSum\r
 728 \r
 729 namespace cv { namespace gpu { namespace imgproc\r
 730 {\r
 731     void columnSum_32F(const DevMem2D src, const DevMem2D dst);\r
 732 }}}\r
 733 \r
 734 void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)\r
 735 {\r
 736     CV_Assert(src.type() == CV_32F);\r
 737 \r
 738     dst.create(src.size(), CV_32F);\r
 739     imgproc::columnSum_32F(src, dst);\r
 740 }\r
 741 \r
 742 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)\r
 743 {\r
 744     CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);\r
 745 \r
 746     dst.create(src.size(), CV_32FC1);\r
 747 \r
 748     NppiSize sz;\r
 749     sz.width = src.cols;\r
 750     sz.height = src.rows;\r
 751 \r
 752     NppiRect nppRect;\r
 753     nppRect.height = rect.height;\r
 754     nppRect.width = rect.width;\r
 755     nppRect.x = rect.x;\r
 756     nppRect.y = rect.y;\r
 757 \r
 758     cudaStream_t stream = StreamAccessor::getStream(s);\r
 759 \r
 760     NppStreamHandler h(stream);\r
 761 \r
 762     nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp32f>(), static_cast<int>(sqr.step),\r
 763                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );\r
 764 \r
 765     if (stream == 0)\r
 766         cudaSafeCall( cudaDeviceSynchronize() );\r
 767 }\r
 768 \r
 769 \r
 770 ////////////////////////////////////////////////////////////////////////\r
 771 // Histogram\r
 772 \r
 773 namespace\r
 774 {\r
 775     template<int n> struct NPPTypeTraits;\r
 776     template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };\r
 777     template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };\r
 778     template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };\r
 779     template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };\r
 780 \r
 781     typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);\r
 782     typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);\r
 783 \r
 784     template<int SDEPTH> struct NppHistogramEvenFuncC1\r
 785     {\r
 786         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 787 \r
 788         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,\r
 789                     int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);\r
 790     };\r
 791     template<int SDEPTH> struct NppHistogramEvenFuncC4\r
 792     {\r
 793         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 794 \r
 795         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,\r
 796             Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);\r
 797     };\r
 798 \r
 799     template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
 800     struct NppHistogramEvenC1\r
 801     {\r
 802         typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;\r
 803 \r
 804         static void hist(const GpuMat& src, GpuMat& hist, GpuMat& buffer, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)\r
 805         {\r
 806             int levels = histSize + 1;\r
 807             hist.create(1, histSize, CV_32S);\r
 808 \r
 809             NppiSize sz;\r
 810             sz.width = src.cols;\r
 811             sz.height = src.rows;\r
 812 \r
 813             int buf_size;\r
 814             get_buf_size(sz, levels, &buf_size);\r
 815 \r
 816             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 817 \r
 818             NppStreamHandler h(stream);\r
 819 \r
 820             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,\r
 821                 lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 822 \r
 823             if (stream == 0)\r
 824                 cudaSafeCall( cudaDeviceSynchronize() );\r
 825         }\r
 826     };\r
 827     template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
 828     struct NppHistogramEvenC4\r
 829     {\r
 830         typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;\r
 831 \r
 832         static void hist(const GpuMat& src, GpuMat hist[4], GpuMat& buffer, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)\r
 833         {\r
 834             int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};\r
 835             hist[0].create(1, histSize[0], CV_32S);\r
 836             hist[1].create(1, histSize[1], CV_32S);\r
 837             hist[2].create(1, histSize[2], CV_32S);\r
 838             hist[3].create(1, histSize[3], CV_32S);\r
 839 \r
 840             NppiSize sz;\r
 841             sz.width = src.cols;\r
 842             sz.height = src.rows;\r
 843 \r
 844             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
 845 \r
 846             int buf_size;\r
 847             get_buf_size(sz, levels, &buf_size);\r
 848 \r
 849             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 850 \r
 851             NppStreamHandler h(stream);\r
 852 \r
 853             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 854 \r
 855             if (stream == 0)\r
 856                 cudaSafeCall( cudaDeviceSynchronize() );\r
 857         }\r
 858     };\r
 859 \r
 860     template<int SDEPTH> struct NppHistogramRangeFuncC1\r
 861     {\r
 862         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 863         typedef Npp32s level_t;\r
 864         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 865 \r
 866         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 867             const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);\r
 868     };\r
 869     template<> struct NppHistogramRangeFuncC1<CV_32F>\r
 870     {\r
 871         typedef Npp32f src_t;\r
 872         typedef Npp32f level_t;\r
 873         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
 874 \r
 875         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 876             const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);\r
 877     };\r
 878     template<int SDEPTH> struct NppHistogramRangeFuncC4\r
 879     {\r
 880         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 881         typedef Npp32s level_t;\r
 882         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 883 \r
 884         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
 885             const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
 886     };\r
 887     template<> struct NppHistogramRangeFuncC4<CV_32F>\r
 888     {\r
 889         typedef Npp32f src_t;\r
 890         typedef Npp32f level_t;\r
 891         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
 892 \r
 893         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
 894             const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
 895     };\r
 896 \r
 897     template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
 898     struct NppHistogramRangeC1\r
 899     {\r
 900         typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;\r
 901         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
 902         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
 903 \r
 904         static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buffer, cudaStream_t stream)\r
 905         {\r
 906             CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1);\r
 907 \r
 908             hist.create(1, levels.cols - 1, CV_32S);\r
 909 \r
 910             NppiSize sz;\r
 911             sz.width = src.cols;\r
 912             sz.height = src.rows;\r
 913 \r
 914             int buf_size;\r
 915             get_buf_size(sz, levels.cols, &buf_size);\r
 916             \r
 917             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 918 \r
 919             NppStreamHandler h(stream);\r
 920 \r
 921             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );\r
 922 \r
 923             if (stream == 0)\r
 924                 cudaSafeCall( cudaDeviceSynchronize() );\r
 925         }\r
 926     };\r
 927     template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
 928     struct NppHistogramRangeC4\r
 929     {\r
 930         typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;\r
 931         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
 932         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
 933 \r
 934         static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buffer, cudaStream_t stream)\r
 935         {\r
 936             CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1);\r
 937             CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1);\r
 938             CV_Assert(levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1);\r
 939             CV_Assert(levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1);\r
 940 \r
 941             hist[0].create(1, levels[0].cols - 1, CV_32S);\r
 942             hist[1].create(1, levels[1].cols - 1, CV_32S);\r
 943             hist[2].create(1, levels[2].cols - 1, CV_32S);\r
 944             hist[3].create(1, levels[3].cols - 1, CV_32S);\r
 945 \r
 946             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
 947             int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};\r
 948             const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};\r
 949 \r
 950             NppiSize sz;\r
 951             sz.width = src.cols;\r
 952             sz.height = src.rows;\r
 953 \r
 954             int buf_size;\r
 955             get_buf_size(sz, nLevels, &buf_size);\r
 956 \r
 957             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 958 \r
 959             NppStreamHandler h(stream);\r
 960 \r
 961             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );\r
 962 \r
 963             if (stream == 0)\r
 964                 cudaSafeCall( cudaDeviceSynchronize() );\r
 965         }\r
 966     };\r
 967 }\r
 968 \r
 969 void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)\r
 970 {\r
 971     Mat host_levels(1, nLevels, CV_32SC1);\r
 972     nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );\r
 973     levels.upload(host_levels);\r
 974 }\r
 975 \r
 976 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
 977 {\r
 978     GpuMat buf;\r
 979     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
 980 }\r
 981 \r
 982 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
 983 {\r
 984     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );\r
 985 \r
 986     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, GpuMat& buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);\r
 987     static const hist_t hist_callers[] =\r
 988     {\r
 989         NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,\r
 990         0,\r
 991         NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,\r
 992         NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist\r
 993     };\r
 994 \r
 995     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
 996 }\r
 997 \r
 998 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
 999 {\r
1000     GpuMat buf;\r
1001     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
1002 }\r
1003 \r
1004 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
1005 {\r
1006     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );\r
1007 \r
1008     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);\r
1009     static const hist_t hist_callers[] =\r
1010     {\r
1011         NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,\r
1012         0,\r
1013         NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,\r
1014         NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist\r
1015     };\r
1016 \r
1017     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
1018 }\r
1019 \r
1020 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream)\r
1021 {\r
1022     GpuMat buf;\r
1023     histRange(src, hist, levels, buf, stream);\r
1024 }\r
1025 \r
1026 \r
1027 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream)\r
1028 {\r
1029     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);\r
1030 \r
1031     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, cudaStream_t stream);\r
1032     static const hist_t hist_callers[] =\r
1033     {\r
1034         NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,\r
1035         0,\r
1036         NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,\r
1037         NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,\r
1038         0,\r
1039         NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist\r
1040     };\r
1041 \r
1042     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1043 }\r
1044 \r
1045 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)\r
1046 {\r
1047     GpuMat buf;\r
1048     histRange(src, hist, levels, buf, stream);\r
1049 }\r
1050 \r
1051 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream)\r
1052 {\r
1053     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4);\r
1054 \r
1055     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, cudaStream_t stream);\r
1056     static const hist_t hist_callers[] =\r
1057     {\r
1058         NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,\r
1059         0,\r
1060         NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,\r
1061         NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,\r
1062         0,\r
1063         NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist\r
1064     };\r
1065 \r
1066     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1067 }\r
1068 \r
1069 namespace cv { namespace gpu { namespace histograms\r
1070 {\r
1071     void histogram256_gpu(DevMem2D src, int* hist, unsigned int* buf, cudaStream_t stream);\r
1072 \r
1073     const int PARTIAL_HISTOGRAM256_COUNT = 240;\r
1074     const int HISTOGRAM256_BIN_COUNT     = 256;\r
1075 }}}\r
1076 \r
1077 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)\r
1078 {\r
1079     GpuMat buf;\r
1080     calcHist(src, hist, buf, stream);\r
1081 }\r
1082 \r
1083 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)\r
1084 {\r
1085     using namespace cv::gpu::histograms;\r
1086 \r
1087     CV_Assert(src.type() == CV_8UC1);\r
1088 \r
1089     hist.create(1, 256, CV_32SC1);\r
1090 \r
1091     ensureSizeIsEnough(1, PARTIAL_HISTOGRAM256_COUNT * HISTOGRAM256_BIN_COUNT, CV_32SC1, buf);\r
1092 \r
1093     histogram256_gpu(src, hist.ptr<int>(), buf.ptr<unsigned int>(), StreamAccessor::getStream(stream));\r
1094 }\r
1095 \r
1096 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1097 {\r
1098     GpuMat hist;\r
1099     GpuMat buf;\r
1100     equalizeHist(src, dst, hist, buf, stream);\r
1101 }\r
1102 \r
1103 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream)\r
1104 {\r
1105     GpuMat buf;\r
1106     equalizeHist(src, dst, hist, buf, stream);\r
1107 }\r
1108 \r
1109 namespace cv { namespace gpu { namespace histograms\r
1110 {\r
1111     void equalizeHist_gpu(DevMem2D src, DevMem2D dst, const int* lut, cudaStream_t stream);\r
1112 }}}\r
1113 \r
1114 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)\r
1115 {\r
1116     using namespace cv::gpu::histograms;\r
1117 \r
1118     CV_Assert(src.type() == CV_8UC1);\r
1119 \r
1120     dst.create(src.size(), src.type());\r
1121 \r
1122     int intBufSize;\r
1123     nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) );\r
1124 \r
1125     int bufSize = static_cast<int>(std::max(256 * 240 * sizeof(int), intBufSize + 256 * sizeof(int)));\r
1126 \r
1127     ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);\r
1128 \r
1129     GpuMat histBuf(1, 256 * 240, CV_32SC1, buf.ptr());\r
1130     GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr());\r
1131     GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize);\r
1132 \r
1133     calcHist(src, hist, histBuf, s);\r
1134 \r
1135     cudaStream_t stream = StreamAccessor::getStream(s);\r
1136 \r
1137     NppStreamHandler h(stream);\r
1138 \r
1139     nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) );\r
1140     \r
1141     if (stream == 0)\r
1142         cudaSafeCall( cudaDeviceSynchronize() );\r
1143 \r
1144     equalizeHist_gpu(src, dst, lut.ptr<int>(), stream);\r
1145 }\r
1146 \r
1147 ////////////////////////////////////////////////////////////////////////\r
1148 // cornerHarris & minEgenVal\r
1149 \r
1150 namespace cv { namespace gpu { namespace imgproc {\r
1151 \r
1152     void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst);\r
1153     void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1154     void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1155 \r
1156 }}}\r
1157 \r
1158 namespace \r
1159 {\r
1160     template <typename T>\r
1161     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1162     {   \r
1163         double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;\r
1164         if (ksize < 0) \r
1165             scale *= 2.;\r
1166         if (src.depth() == CV_8U)\r
1167             scale *= 255.;\r
1168         scale = 1./scale;\r
1169 \r
1170         Dx.create(src.size(), CV_32F);\r
1171         Dy.create(src.size(), CV_32F);\r
1172 \r
1173         if (ksize > 0)\r
1174         {\r
1175             Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, borderType);\r
1176             Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, borderType);\r
1177         }\r
1178         else\r
1179         {\r
1180             Scharr(src, Dx, CV_32F, 1, 0, scale, borderType);\r
1181             Scharr(src, Dy, CV_32F, 0, 1, scale, borderType);\r
1182         }\r
1183     }\r
1184 \r
1185     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1186     {\r
1187         switch (src.type())\r
1188         {\r
1189         case CV_8U:\r
1190             extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, borderType);\r
1191             break;\r
1192         case CV_32F:\r
1193             extractCovData<float>(src, Dx, Dy, blockSize, ksize, borderType);\r
1194             break;\r
1195         default:\r
1196             CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");\r
1197         }\r
1198     }\r
1199 \r
1200 } // Anonymous namespace\r
1201 \r
1202 \r
1203 bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)\r
1204 {\r
1205     if (cpuBorderType == cv::BORDER_REFLECT101)\r
1206     {\r
1207         gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;\r
1208         return true;\r
1209     }\r
1210 \r
1211     if (cpuBorderType == cv::BORDER_REPLICATE)\r
1212     {\r
1213         gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;\r
1214         return true;\r
1215     }\r
1216     \r
1217     if (cpuBorderType == cv::BORDER_CONSTANT)\r
1218     {\r
1219         gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;\r
1220         return true;\r
1221     }\r
1222 \r
1223     return false;\r
1224 }\r
1225 \r
1226 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)\r
1227 {\r
1228     GpuMat Dx, Dy;\r
1229     cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k, borderType);\r
1230 }\r
1231 \r
1232 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType)\r
1233 {\r
1234     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1235               borderType == cv::BORDER_REPLICATE);\r
1236 \r
1237     int gpuBorderType;\r
1238     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1239 \r
1240     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);\r
1241     dst.create(src.size(), CV_32F);\r
1242     imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);\r
1243 }\r
1244 \r
1245 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)\r
1246 {  \r
1247     GpuMat Dx, Dy;\r
1248     cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize, borderType);\r
1249 }\r
1250 \r
1251 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1252 {  \r
1253     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1254               borderType == cv::BORDER_REPLICATE);\r
1255 \r
1256     int gpuBorderType;\r
1257     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1258 \r
1259     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);    \r
1260     dst.create(src.size(), CV_32F);\r
1261     imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);\r
1262 }\r
1263 \r
1264 //////////////////////////////////////////////////////////////////////////////\r
1265 // mulSpectrums\r
1266 \r
1267 namespace cv { namespace gpu { namespace imgproc \r
1268 {\r
1269     void mulSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1270                       DevMem2D_<cufftComplex> c);\r
1271 \r
1272     void mulSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1273                            DevMem2D_<cufftComplex> c);\r
1274 }}}\r
1275 \r
1276 \r
1277 void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, \r
1278                            int flags, bool conjB) \r
1279 {\r
1280     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>, \r
1281                            DevMem2D_<cufftComplex>);\r
1282     static Caller callers[] = { imgproc::mulSpectrums, \r
1283                                 imgproc::mulSpectrums_CONJ };\r
1284 \r
1285     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1286     CV_Assert(a.size() == b.size());\r
1287 \r
1288     c.create(a.size(), CV_32FC2);\r
1289 \r
1290     Caller caller = callers[(int)conjB];\r
1291     caller(a, b, c);\r
1292 }\r
1293 \r
1294 //////////////////////////////////////////////////////////////////////////////\r
1295 // mulAndScaleSpectrums\r
1296 \r
1297 namespace cv { namespace gpu { namespace imgproc \r
1298 {\r
1299     void mulAndScaleSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1300                              float scale, DevMem2D_<cufftComplex> c);\r
1301 \r
1302     void mulAndScaleSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1303                                   float scale, DevMem2D_<cufftComplex> c);\r
1304 }}}\r
1305 \r
1306 \r
1307 void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,\r
1308                                   int flags, float scale, bool conjB) \r
1309 {\r
1310     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>,\r
1311                            float scale, DevMem2D_<cufftComplex>);\r
1312     static Caller callers[] = { imgproc::mulAndScaleSpectrums, \r
1313                                 imgproc::mulAndScaleSpectrums_CONJ };\r
1314 \r
1315     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1316     CV_Assert(a.size() == b.size());\r
1317 \r
1318     c.create(a.size(), CV_32FC2);\r
1319 \r
1320     Caller caller = callers[(int)conjB];\r
1321     caller(a, b, scale, c);\r
1322 }\r
1323 \r
1324 //////////////////////////////////////////////////////////////////////////////\r
1325 // dft\r
1326 \r
1327 void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)\r
1328 {\r
1329     CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);\r
1330 \r
1331     // We don't support unpacked output (in the case of real input)\r
1332     CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));\r
1333 \r
1334     bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);\r
1335     int is_row_dft = flags & DFT_ROWS;\r
1336     int is_scaled_dft = flags & DFT_SCALE;\r
1337     int is_inverse = flags & DFT_INVERSE;\r
1338     bool is_complex_input = src.channels() == 2;\r
1339     bool is_complex_output = !(flags & DFT_REAL_OUTPUT);\r
1340 \r
1341     // We don't support real-to-real transform\r
1342     CV_Assert(is_complex_input || is_complex_output);\r
1343 \r
1344     GpuMat src_data;\r
1345 \r
1346     // Make sure here we work with the continuous input, \r
1347     // as CUFFT can't handle gaps\r
1348     src_data = src;\r
1349     createContinuous(src.rows, src.cols, src.type(), src_data);\r
1350     if (src_data.data != src.data)\r
1351         src.copyTo(src_data);\r
1352 \r
1353     Size dft_size_opt = dft_size;\r
1354     if (is_1d_input && !is_row_dft)\r
1355     {\r
1356         // If the source matrix is single column handle it as single row\r
1357         dft_size_opt.width = std::max(dft_size.width, dft_size.height);\r
1358         dft_size_opt.height = std::min(dft_size.width, dft_size.height);\r
1359     }\r
1360 \r
1361     cufftType dft_type = CUFFT_R2C;\r
1362     if (is_complex_input) \r
1363         dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
1364 \r
1365     CV_Assert(dft_size_opt.width > 1);\r
1366 \r
1367     cufftHandle plan;\r
1368     if (is_1d_input || is_row_dft)\r
1369         cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);\r
1370     else\r
1371         cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);\r
1372 \r
1373     if (is_complex_input)\r
1374     {\r
1375         if (is_complex_output)\r
1376         {\r
1377             createContinuous(dft_size, CV_32FC2, dst);\r
1378             cufftSafeCall(cufftExecC2C(\r
1379                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),\r
1380                     is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));\r
1381         }\r
1382         else\r
1383         {\r
1384             createContinuous(dft_size, CV_32F, dst);\r
1385             cufftSafeCall(cufftExecC2R(\r
1386                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftReal>()));\r
1387         }\r
1388     }\r
1389     else\r
1390     {\r
1391         // We could swap dft_size for efficiency. Here we must reflect it\r
1392         if (dft_size == dft_size_opt)\r
1393             createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);\r
1394         else\r
1395             createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);\r
1396 \r
1397         cufftSafeCall(cufftExecR2C(\r
1398                 plan, src_data.ptr<cufftReal>(), dst.ptr<cufftComplex>()));\r
1399     }\r
1400 \r
1401     cufftSafeCall(cufftDestroy(plan));\r
1402 \r
1403     if (is_scaled_dft)\r
1404         multiply(dst, Scalar::all(1. / dft_size.area()), dst);\r
1405 }\r
1406 \r
1407 //////////////////////////////////////////////////////////////////////////////\r
1408 // convolve\r
1409 \r
1410 \r
1411 void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)\r
1412 {\r
1413     result_size = Size(image_size.width - templ_size.width + 1,\r
1414                        image_size.height - templ_size.height + 1);\r
1415     block_size = estimateBlockSize(result_size, templ_size);\r
1416 \r
1417     dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);\r
1418     dft_size.height = getOptimalDFTSize(block_size.width + templ_size.height - 1);\r
1419     createContinuous(dft_size, CV_32F, image_block);\r
1420     createContinuous(dft_size, CV_32F, templ_block);\r
1421     createContinuous(dft_size, CV_32F, result_data);\r
1422 \r
1423     spect_len = dft_size.height * (dft_size.width / 2 + 1);\r
1424     createContinuous(1, spect_len, CV_32FC2, image_spect);\r
1425     createContinuous(1, spect_len, CV_32FC2, templ_spect);\r
1426     createContinuous(1, spect_len, CV_32FC2, result_spect);\r
1427 \r
1428     block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);\r
1429     block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);\r
1430 }\r
1431 \r
1432 \r
1433 Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)\r
1434 {\r
1435     int scale = 40;\r
1436     Size bsize_min(1024, 1024);\r
1437 \r
1438     // Check whether we use Fermi generation or newer GPU\r
1439     if (DeviceInfo().majorVersion() >= 2)\r
1440     {\r
1441         bsize_min.width = 2048;\r
1442         bsize_min.height = 2048;\r
1443     }\r
1444 \r
1445     Size bsize(std::max(templ_size.width * scale, bsize_min.width),\r
1446                std::max(templ_size.height * scale, bsize_min.height));\r
1447 \r
1448     bsize.width = std::min(bsize.width, result_size.width);\r
1449     bsize.height = std::min(bsize.height, result_size.height);\r
1450     return bsize;\r
1451 }\r
1452 \r
1453 \r
1454 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1455                        bool ccorr)\r
1456 {\r
1457     ConvolveBuf buf;\r
1458     convolve(image, templ, result, ccorr, buf);\r
1459 }\r
1460 \r
1461 \r
1462 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1463                        bool ccorr, ConvolveBuf& buf)\r
1464 {\r
1465     StaticAssert<sizeof(float) == sizeof(cufftReal)>::check();\r
1466     StaticAssert<sizeof(float) * 2 == sizeof(cufftComplex)>::check();\r
1467 \r
1468     CV_Assert(image.type() == CV_32F);\r
1469     CV_Assert(templ.type() == CV_32F);\r
1470 \r
1471     buf.create(image.size(), templ.size());\r
1472     result.create(buf.result_size, CV_32F);\r
1473 \r
1474     Size& block_size = buf.block_size;\r
1475     Size& dft_size = buf.dft_size;\r
1476 \r
1477     GpuMat& image_block = buf.image_block;\r
1478     GpuMat& templ_block = buf.templ_block;\r
1479     GpuMat& result_data = buf.result_data;\r
1480 \r
1481     GpuMat& image_spect = buf.image_spect;\r
1482     GpuMat& templ_spect = buf.templ_spect;\r
1483     GpuMat& result_spect = buf.result_spect;\r
1484 \r
1485     cufftHandle planR2C, planC2R;\r
1486     cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
1487     cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
1488 \r
1489     GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
1490     copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
1491                    templ_block.cols - templ_roi.cols, 0);\r
1492 \r
1493     cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
1494                                templ_spect.ptr<cufftComplex>()));\r
1495 \r
1496     // Process all blocks of the result matrix\r
1497     for (int y = 0; y < result.rows; y += block_size.height)\r
1498     {\r
1499         for (int x = 0; x < result.cols; x += block_size.width)\r
1500         {\r
1501             Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,\r
1502                                 std::min(y + dft_size.height, image.rows) - y);\r
1503             GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), \r
1504                              image.step);\r
1505             copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,\r
1506                            0, image_block.cols - image_roi.cols, 0);\r
1507 \r
1508             cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(), \r
1509                                        image_spect.ptr<cufftComplex>()));\r
1510             mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,\r
1511                                  1.f / dft_size.area(), ccorr);\r
1512             cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
1513                                        result_data.ptr<cufftReal>()));\r
1514 \r
1515             Size result_roi_size(std::min(x + block_size.width, result.cols) - x,\r
1516                                  std::min(y + block_size.height, result.rows) - y);\r
1517             GpuMat result_roi(result_roi_size, result.type(), \r
1518                               (void*)(result.ptr<float>(y) + x), result.step);\r
1519             GpuMat result_block(result_roi_size, result_data.type(), \r
1520                                 result_data.ptr(), result_data.step);\r
1521             result_block.copyTo(result_roi);\r
1522         }\r
1523     }\r
1524 \r
1525     cufftSafeCall(cufftDestroy(planR2C));\r
1526     cufftSafeCall(cufftDestroy(planC2R));\r
1527 }\r
1528 \r
1529 \r
1530 ////////////////////////////////////////////////////////////////////\r
1531 // downsample\r
1532 \r
1533 namespace cv { namespace gpu { namespace imgproc\r
1534 {\r
1535     template <typename T, int cn>\r
1536     void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
1537 }}}\r
1538 \r
1539 \r
1540 void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1541 {\r
1542     CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
1543 \r
1544     typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
1545     static const Caller callers[6][4] =\r
1546         {{imgproc::downsampleCaller<uchar,1>, imgproc::downsampleCaller<uchar,2>,\r
1547           imgproc::downsampleCaller<uchar,3>, imgproc::downsampleCaller<uchar,4>},\r
1548          {0,0,0,0}, {0,0,0,0},\r
1549          {imgproc::downsampleCaller<short,1>, imgproc::downsampleCaller<short,2>,\r
1550           imgproc::downsampleCaller<short,3>, imgproc::downsampleCaller<short,4>},\r
1551          {0,0,0,0},\r
1552          {imgproc::downsampleCaller<float,1>, imgproc::downsampleCaller<float,2>,\r
1553           imgproc::downsampleCaller<float,3>, imgproc::downsampleCaller<float,4>}};\r
1554 \r
1555     Caller caller = callers[src.depth()][src.channels()-1];\r
1556     if (!caller)\r
1557         CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
1558 \r
1559     dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
1560     caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
1561 }\r
1562 \r
1563 \r
1564 //////////////////////////////////////////////////////////////////////////////\r
1565 // upsample\r
1566 \r
1567 namespace cv { namespace gpu { namespace imgproc\r
1568 {\r
1569     template <typename T, int cn>\r
1570     void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
1571 }}}\r
1572 \r
1573 \r
1574 void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1575 {\r
1576     CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
1577 \r
1578     typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
1579     static const Caller callers[6][5] =\r
1580         {{imgproc::upsampleCaller<uchar,1>, imgproc::upsampleCaller<uchar,2>,\r
1581           imgproc::upsampleCaller<uchar,3>, imgproc::upsampleCaller<uchar,4>},\r
1582          {0,0,0,0}, {0,0,0,0},\r
1583          {imgproc::upsampleCaller<short,1>, imgproc::upsampleCaller<short,2>,\r
1584           imgproc::upsampleCaller<short,3>, imgproc::upsampleCaller<short,4>},\r
1585          {0,0,0,0},\r
1586          {imgproc::upsampleCaller<float,1>, imgproc::upsampleCaller<float,2>,\r
1587           imgproc::upsampleCaller<float,3>, imgproc::upsampleCaller<float,4>}};\r
1588 \r
1589     Caller caller = callers[src.depth()][src.channels()-1];\r
1590     if (!caller)\r
1591         CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
1592 \r
1593     dst.create(src.rows*2, src.cols*2, src.type());\r
1594     caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
1595 }\r
1596 \r
1597 \r
1598 //////////////////////////////////////////////////////////////////////////////\r
1599 // pyrDown\r
1600 \r
1601 void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1602 {\r
1603     PyrDownBuf buf;\r
1604     pyrDown(src, dst, buf, stream);\r
1605 }\r
1606 \r
1607 cv::Mat cv::gpu::PyrDownBuf::ker;\r
1608 \r
1609 void cv::gpu::PyrDownBuf::create(Size image_size, int image_type_)\r
1610 {\r
1611     if (ker.empty() || image_type_ != image_type)\r
1612         ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_)));\r
1613 \r
1614     ensureSizeIsEnough(image_size.height, image_size.width, image_type_, buf);\r
1615 \r
1616     if (filter.empty() || image_type_ != image_type)\r
1617     {\r
1618         image_type = image_type_;\r
1619         filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker);\r
1620     }\r
1621 }\r
1622 \r
1623 void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, PyrDownBuf& buf, Stream& stream)\r
1624 {\r
1625     buf.create(src.size(), src.type());\r
1626     buf.filter->apply(src, buf.buf, Rect(0, 0, src.cols, src.rows), stream);\r
1627     downsample(buf.buf, dst, stream);\r
1628 }\r
1629 \r
1630 \r
1631 //////////////////////////////////////////////////////////////////////////////\r
1632 // pyrUp\r
1633 \r
1634 void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1635 {\r
1636     PyrUpBuf buf;\r
1637     pyrUp(src, dst, buf, stream);\r
1638 }\r
1639 \r
1640 cv::Mat cv::gpu::PyrUpBuf::ker;\r
1641 \r
1642 void cv::gpu::PyrUpBuf::create(Size image_size, int image_type_)\r
1643 {\r
1644     if (ker.empty() || image_type_ != image_type)\r
1645         ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_))) * 2;\r
1646 \r
1647     ensureSizeIsEnough(image_size.height * 2, image_size.width * 2, image_type_, buf);\r
1648 \r
1649     if (filter.empty() || image_type_ != image_type)\r
1650     {\r
1651         image_type = image_type_;\r
1652         filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker);\r
1653     }\r
1654 }\r
1655 \r
1656 void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& stream)\r
1657 {\r
1658     buf.create(src.size(), src.type());\r
1659     upsample(src, buf.buf, stream);\r
1660     buf.filter->apply(buf.buf, dst, Rect(0, 0, buf.buf.cols, buf.buf.rows), stream);\r
1661 }\r
1662 \r
1663 \r
1664 //////////////////////////////////////////////////////////////////////////////\r
1665 // Canny\r
1666 \r
1667 cv::gpu::CannyBuf::CannyBuf(const GpuMat& dx_, const GpuMat& dy_) : dx(dx_), dy(dy_)\r
1668 {\r
1669     CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());\r
1670 \r
1671     create(dx_.size(), -1);\r
1672 }\r
1673 \r
1674 void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)\r
1675 {\r
1676     ensureSizeIsEnough(image_size, CV_32SC1, dx);\r
1677     ensureSizeIsEnough(image_size, CV_32SC1, dy);\r
1678 \r
1679     if (apperture_size == 3)\r
1680     {\r
1681         ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);\r
1682         ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);\r
1683     }\r
1684     else if(apperture_size > 0)\r
1685     {\r
1686         if (!filterDX)\r
1687             filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);\r
1688         if (!filterDY)\r
1689             filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);\r
1690     }\r
1691 \r
1692     ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);\r
1693 \r
1694     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);\r
1695     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);\r
1696 }\r
1697 \r
1698 void cv::gpu::CannyBuf::release()\r
1699 {\r
1700     dx.release();\r
1701     dy.release();\r
1702     dx_buf.release();\r
1703     dy_buf.release();\r
1704     edgeBuf.release();\r
1705     trackBuf1.release();\r
1706     trackBuf2.release();\r
1707 }\r
1708 \r
1709 namespace cv { namespace gpu { namespace canny\r
1710 {    \r
1711     void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);\r
1712 \r
1713     void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1714     void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1715 \r
1716     void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh);\r
1717     \r
1718     void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols);\r
1719 \r
1720     void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);\r
1721 \r
1722     void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols);\r
1723 }}}\r
1724 \r
1725 namespace\r
1726 {\r
1727     void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)\r
1728     {\r
1729         using namespace cv::gpu::canny;\r
1730 \r
1731         calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);\r
1732         \r
1733         edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), dst.rows, dst.cols);\r
1734         \r
1735         edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), buf.trackBuf2.ptr<ushort2>(), dst.rows, dst.cols);\r
1736         \r
1737         getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);\r
1738     }\r
1739 }\r
1740 \r
1741 void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1742 {\r
1743     CannyBuf buf(src.size(), apperture_size);\r
1744     Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);\r
1745 }\r
1746 \r
1747 void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1748 {\r
1749     using namespace cv::gpu::canny;\r
1750 \r
1751     CV_Assert(src.type() == CV_8UC1);\r
1752 \r
1753     if( low_thresh > high_thresh )\r
1754         std::swap( low_thresh, high_thresh);\r
1755 \r
1756     dst.create(src.size(), CV_8U);\r
1757     dst.setTo(Scalar::all(0));\r
1758     \r
1759     buf.create(src.size(), apperture_size);\r
1760     buf.edgeBuf.setTo(Scalar::all(0));\r
1761 \r
1762     if (apperture_size == 3)\r
1763     {\r
1764         calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);\r
1765 \r
1766         calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1767     }\r
1768     else\r
1769     {\r
1770         buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows));\r
1771         buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows));\r
1772 \r
1773         calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1774     }\r
1775 \r
1776     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1777 }\r
1778 \r
1779 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1780 {\r
1781     CannyBuf buf(dx, dy);\r
1782     Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);\r
1783 }\r
1784 \r
1785 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1786 {\r
1787     using namespace cv::gpu::canny;\r
1788 \r
1789     CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());\r
1790 \r
1791     if( low_thresh > high_thresh )\r
1792         std::swap( low_thresh, high_thresh);\r
1793 \r
1794     dst.create(dx.size(), CV_8U);\r
1795     dst.setTo(Scalar::all(0));\r
1796     \r
1797     buf.dx = dx; buf.dy = dy;\r
1798     buf.create(dx.size(), -1);\r
1799     buf.edgeBuf.setTo(Scalar::all(0));\r
1800 \r
1801     calcMagnitude_gpu(dx, dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);\r
1802 \r
1803     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1804 }\r
1805 \r
1806 #endif /* !defined (HAVE_CUDA) */\r
1807 \r
1808 \r