modules/gpu/src/imgproc.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
   2 //\r
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
   4 //\r
   5 //  By downloading, copying, installing or using the software you agree to this license.\r
   6 //  If you do not agree to this license, do not download, install,\r
   7 //  copy or use the software.\r
   8 //\r
   9 //\r
  10 //                           License Agreement\r
  11 //                For Open Source Computer Vision Library\r
  12 //\r
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
  15 // Third party copyrights are property of their respective owners.\r
  16 //\r
  17 // Redistribution and use in source and binary forms, with or without modification,\r
  18 // are permitted provided that the following conditions are met:\r
  19 //\r
  20 //   * Redistribution's of source code must retain the above copyright notice,\r
  21 //     this list of conditions and the following disclaimer.\r
  22 //\r
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
  24 //     this list of conditions and the following disclaimer in the documentation\r
  25 //     and/or other materials provided with the distribution.\r
  26 //\r
  27 //   * The name of the copyright holders may not be used to endorse or promote products\r
  28 //     derived from this software without specific prior written permission.\r
  29 //\r
  30 // This software is provided by the copyright holders and contributors "as is" and\r
  31 // any express or implied warranties, including, but not limited to, the implied\r
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
  34 // indirect, incidental, special, exemplary, or consequential damages\r
  35 // (including, but not limited to, procurement of substitute goods or services;\r
  36 // loss of use, data, or profits; or business interruption) however caused\r
  37 // and on any theory of liability, whether in contract, strict liability,\r
  38 // or tort (including negligence or otherwise) arising in any way out of\r
  39 // the use of this software, even if advised of the possibility of such damage.\r
  40 //\r
  41 //M*/\r
  42 \r
  43 #include "precomp.hpp"\r
  44 \r
  45 using namespace cv;\r
  46 using namespace cv::gpu;\r
  47 \r
  48 #if !defined (HAVE_CUDA)\r
  49 \r
  50 void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&){ throw_nogpu(); }\r
  51 void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  52 void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  53 void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  54 void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }\r
  55 void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }\r
  56 void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
  57 void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  58 void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  59 void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, double, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  60 void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  61 void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  62 void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }\r
  63 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  64 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  65 void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  66 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  67 void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
  68 void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_nogpu(); }\r
  69 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_nogpu(); }\r
  70 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  71 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  72 void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  73 void cv::gpu::histEven(const GpuMat&, GpuMat*, GpuMat&, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  74 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }\r
  75 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  76 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_nogpu(); }\r
  77 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, GpuMat&, Stream&) { throw_nogpu(); }\r
  78 void cv::gpu::calcHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  79 void cv::gpu::calcHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  80 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  81 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  82 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  83 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  84 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  85 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  86 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  87 void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }\r
  88 void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool) { throw_nogpu(); }\r
  89 void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); }\r
  90 void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }\r
  91 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }\r
  92 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }\r
  93 void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  94 void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  95 void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  96 void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  97 void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
  98 void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
  99 void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
 100 void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
 101 cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); }\r
 102 void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); }\r
 103 void cv::gpu::CannyBuf::release() { throw_nogpu(); }\r
 104 \r
 105 #else /* !defined (HAVE_CUDA) */\r
 106 \r
 107 namespace cv { namespace gpu {  namespace imgproc\r
 108 {\r
 109     void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
 110     void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
 111 \r
 112     extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);\r
 113     extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);\r
 114 \r
 115     void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 116     void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 117 \r
 118     void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 119     void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 120 }}}\r
 121 \r
 122 ////////////////////////////////////////////////////////////////////////\r
 123 // remap\r
 124 \r
 125 void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap)\r
 126 {\r
 127     typedef void (*remap_gpu_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
 128     static const remap_gpu_t callers[] = {imgproc::remap_gpu_1c, 0, imgproc::remap_gpu_3c};\r
 129 \r
 130     CV_Assert((src.type() == CV_8U || src.type() == CV_8UC3) && xmap.type() == CV_32F && ymap.type() == CV_32F);\r
 131 \r
 132     dst.create(xmap.size(), src.type());\r
 133 \r
 134     callers[src.channels() - 1](src, xmap, ymap, dst);\r
 135 }\r
 136 \r
 137 ////////////////////////////////////////////////////////////////////////\r
 138 // meanShiftFiltering_GPU\r
 139 \r
 140 void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria)\r
 141 {\r
 142     if( src.empty() )\r
 143         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 144 \r
 145     if( src.depth() != CV_8U || src.channels() != 4 )\r
 146         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 147 \r
 148     dst.create( src.size(), CV_8UC4 );\r
 149 \r
 150     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 151         criteria.maxCount = 5;\r
 152 \r
 153     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 154 \r
 155     float eps;\r
 156     if( !(criteria.type & TermCriteria::EPS) )\r
 157         eps = 1.f;\r
 158     eps = (float)std::max(criteria.epsilon, 0.0);\r
 159 \r
 160     imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);\r
 161 }\r
 162 \r
 163 ////////////////////////////////////////////////////////////////////////\r
 164 // meanShiftProc_GPU\r
 165 \r
 166 void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria)\r
 167 {\r
 168     if( src.empty() )\r
 169         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 170 \r
 171     if( src.depth() != CV_8U || src.channels() != 4 )\r
 172         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 173 \r
 174     dstr.create( src.size(), CV_8UC4 );\r
 175     dstsp.create( src.size(), CV_16SC2 );\r
 176 \r
 177     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 178         criteria.maxCount = 5;\r
 179 \r
 180     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 181 \r
 182     float eps;\r
 183     if( !(criteria.type & TermCriteria::EPS) )\r
 184         eps = 1.f;\r
 185     eps = (float)std::max(criteria.epsilon, 0.0);\r
 186 \r
 187     imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);\r
 188 }\r
 189 \r
 190 ////////////////////////////////////////////////////////////////////////\r
 191 // drawColorDisp\r
 192 \r
 193 namespace\r
 194 {\r
 195     template <typename T>\r
 196     void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)\r
 197     {\r
 198         dst.create(src.size(), CV_8UC4);\r
 199 \r
 200         imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, dst, ndisp, stream);\r
 201     }\r
 202 \r
 203     typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);\r
 204 \r
 205     const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};\r
 206 }\r
 207 \r
 208 void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)\r
 209 {\r
 210     CV_Assert(src.type() == CV_8U || src.type() == CV_16S);\r
 211 \r
 212     drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));\r
 213 }\r
 214 \r
 215 ////////////////////////////////////////////////////////////////////////\r
 216 // reprojectImageTo3D\r
 217 \r
 218 namespace\r
 219 {\r
 220     template <typename T>\r
 221     void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)\r
 222     {\r
 223         xyzw.create(disp.rows, disp.cols, CV_32FC4);\r
 224         imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);\r
 225     }\r
 226 \r
 227     typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);\r
 228 \r
 229     const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller<unsigned char>, 0, 0, reprojectImageTo3D_caller<short>, 0, 0, 0, 0};\r
 230 }\r
 231 \r
 232 void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, Stream& stream)\r
 233 {\r
 234     CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);\r
 235 \r
 236     reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));\r
 237 }\r
 238 \r
 239 ////////////////////////////////////////////////////////////////////////\r
 240 // resize\r
 241 \r
 242 void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
 243 {\r
 244     static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};\r
 245 \r
 246     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
 247     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);\r
 248 \r
 249     CV_Assert( src.size().area() > 0 );\r
 250     CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );\r
 251 \r
 252     if( dsize == Size() )\r
 253     {\r
 254         dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));\r
 255     }\r
 256     else\r
 257     {\r
 258         fx = (double)dsize.width / src.cols;\r
 259         fy = (double)dsize.height / src.rows;\r
 260     }\r
 261 \r
 262     dst.create(dsize, src.type());\r
 263 \r
 264     NppiSize srcsz;\r
 265     srcsz.width  = src.cols;\r
 266     srcsz.height = src.rows;\r
 267     NppiRect srcrect;\r
 268     srcrect.x = srcrect.y = 0;\r
 269     srcrect.width  = src.cols;\r
 270     srcrect.height = src.rows;\r
 271     NppiSize dstsz;\r
 272     dstsz.width  = dst.cols;\r
 273     dstsz.height = dst.rows;\r
 274 \r
 275     cudaStream_t stream = StreamAccessor::getStream(s);\r
 276 \r
 277     NppStreamHandler h(stream);\r
 278 \r
 279     if (src.type() == CV_8UC1)\r
 280     {\r
 281         nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 282             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 283     }\r
 284     else\r
 285     {\r
 286         nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 287             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 288     }\r
 289 \r
 290     if (stream == 0)\r
 291         cudaSafeCall( cudaDeviceSynchronize() );\r
 292 }\r
 293 \r
 294 ////////////////////////////////////////////////////////////////////////\r
 295 // copyMakeBorder\r
 296 \r
 297 void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value, Stream& s)\r
 298 {\r
 299     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1);\r
 300 \r
 301     dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
 302 \r
 303     NppiSize srcsz;\r
 304     srcsz.width  = src.cols;\r
 305     srcsz.height = src.rows;\r
 306     NppiSize dstsz;\r
 307     dstsz.width  = dst.cols;\r
 308     dstsz.height = dst.rows;\r
 309 \r
 310     cudaStream_t stream = StreamAccessor::getStream(s);\r
 311 \r
 312     NppStreamHandler h(stream);\r
 313 \r
 314     switch (src.type())\r
 315     {\r
 316     case CV_8UC1:\r
 317         {\r
 318             Npp8u nVal = static_cast<Npp8u>(value[0]);\r
 319             nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 320                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 321             break;\r
 322         }\r
 323     case CV_8UC4:\r
 324         {\r
 325             Npp8u nVal[] = {static_cast<Npp8u>(value[0]), static_cast<Npp8u>(value[1]), static_cast<Npp8u>(value[2]), static_cast<Npp8u>(value[3])};\r
 326             nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 327                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 328             break;\r
 329         }\r
 330     case CV_32SC1:\r
 331         {\r
 332             Npp32s nVal = static_cast<Npp32s>(value[0]);\r
 333             nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 334                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 335             break;\r
 336         }\r
 337     case CV_32FC1:\r
 338         {\r
 339             Npp32f val = static_cast<Npp32f>(value[0]);\r
 340             Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
 341             nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 342                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 343             break;\r
 344         }\r
 345     default:\r
 346         CV_Assert(!"Unsupported source type");\r
 347     }\r
 348 \r
 349     if (stream == 0)\r
 350         cudaSafeCall( cudaDeviceSynchronize() );\r
 351 }\r
 352 \r
 353 ////////////////////////////////////////////////////////////////////////\r
 354 // warp\r
 355 \r
 356 namespace\r
 357 {\r
 358     typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,\r
 359                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 360                                        int interpolation);\r
 361     typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,\r
 362                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 363                                        int interpolation);\r
 364     typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,\r
 365                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 366                                        int interpolation);\r
 367     typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,\r
 368                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 369                                        int interpolation);\r
 370 \r
 371     void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,\r
 372                        npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],\r
 373                        npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2], cudaStream_t stream)\r
 374     {\r
 375         static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 376 \r
 377         int interpolation = flags & INTER_MAX;\r
 378 \r
 379         CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);\r
 380         CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 381 \r
 382         dst.create(dsize, src.type());\r
 383 \r
 384         NppiSize srcsz;\r
 385         srcsz.height = src.rows;\r
 386         srcsz.width = src.cols;\r
 387         NppiRect srcroi;\r
 388         srcroi.x = srcroi.y = 0;\r
 389         srcroi.height = src.rows;\r
 390         srcroi.width = src.cols;\r
 391         NppiRect dstroi;\r
 392         dstroi.x = dstroi.y = 0;\r
 393         dstroi.height = dst.rows;\r
 394         dstroi.width = dst.cols;\r
 395 \r
 396         int warpInd = (flags & WARP_INVERSE_MAP) >> 4;\r
 397 \r
 398         NppStreamHandler h(stream);\r
 399 \r
 400         switch (src.depth())\r
 401         {\r
 402         case CV_8U:\r
 403             nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 404                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 405             break;\r
 406         case CV_16U:\r
 407             nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr<Npp16u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 408                 dst.ptr<Npp16u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 409             break;\r
 410         case CV_32S:\r
 411             nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr<Npp32s>(), srcsz, static_cast<int>(src.step), srcroi,\r
 412                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 413             break;\r
 414         case CV_32F:\r
 415             nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr<Npp32f>(), srcsz, static_cast<int>(src.step), srcroi,\r
 416                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 417             break;\r
 418         default:\r
 419             CV_Assert(!"Unsupported source type");\r
 420         }\r
 421 \r
 422         if (stream == 0)\r
 423             cudaSafeCall( cudaDeviceSynchronize() );\r
 424     }\r
 425 }\r
 426 \r
 427 void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 428 {\r
 429     static npp_warp_8u_t npp_warpAffine_8u[][2] =\r
 430         {\r
 431             {0, 0},\r
 432             {nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},\r
 433             {0, 0},\r
 434             {nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},\r
 435             {nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}\r
 436         };\r
 437     static npp_warp_16u_t npp_warpAffine_16u[][2] =\r
 438         {\r
 439             {0, 0},\r
 440             {nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},\r
 441             {0, 0},\r
 442             {nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},\r
 443             {nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}\r
 444         };\r
 445     static npp_warp_32s_t npp_warpAffine_32s[][2] =\r
 446         {\r
 447             {0, 0},\r
 448             {nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},\r
 449             {0, 0},\r
 450             {nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},\r
 451             {nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}\r
 452         };\r
 453     static npp_warp_32f_t npp_warpAffine_32f[][2] =\r
 454         {\r
 455             {0, 0},\r
 456             {nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},\r
 457             {0, 0},\r
 458             {nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},\r
 459             {nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}\r
 460         };\r
 461 \r
 462     CV_Assert(M.rows == 2 && M.cols == 3);\r
 463 \r
 464     double coeffs[2][3];\r
 465     Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);\r
 466     M.convertTo(coeffsMat, coeffsMat.type());\r
 467 \r
 468     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f, StreamAccessor::getStream(s));\r
 469 }\r
 470 \r
 471 void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 472 {\r
 473     static npp_warp_8u_t npp_warpPerspective_8u[][2] =\r
 474         {\r
 475             {0, 0},\r
 476             {nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},\r
 477             {0, 0},\r
 478             {nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},\r
 479             {nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}\r
 480         };\r
 481     static npp_warp_16u_t npp_warpPerspective_16u[][2] =\r
 482         {\r
 483             {0, 0},\r
 484             {nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},\r
 485             {0, 0},\r
 486             {nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},\r
 487             {nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}\r
 488         };\r
 489     static npp_warp_32s_t npp_warpPerspective_32s[][2] =\r
 490         {\r
 491             {0, 0},\r
 492             {nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},\r
 493             {0, 0},\r
 494             {nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},\r
 495             {nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}\r
 496         };\r
 497     static npp_warp_32f_t npp_warpPerspective_32f[][2] =\r
 498         {\r
 499             {0, 0},\r
 500             {nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},\r
 501             {0, 0},\r
 502             {nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},\r
 503             {nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}\r
 504         };\r
 505 \r
 506     CV_Assert(M.rows == 3 && M.cols == 3);\r
 507 \r
 508     double coeffs[3][3];\r
 509     Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);\r
 510     M.convertTo(coeffsMat, coeffsMat.type());\r
 511 \r
 512     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));\r
 513 }\r
 514 \r
 515 //////////////////////////////////////////////////////////////////////////////\r
 516 // buildWarpPlaneMaps\r
 517 \r
 518 namespace cv { namespace gpu { namespace imgproc\r
 519 {\r
 520     void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 521                             const float r[9], const float rinv[9], float f, float s, float dist,\r
 522                             float half_w, float half_h, cudaStream_t stream);\r
 523 }}}\r
 524 \r
 525 void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 526                                  double dist, GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 527 {\r
 528     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 529     Mat Rinv = R.inv();\r
 530     CV_Assert(Rinv.isContinuous());\r
 531 \r
 532     map_x.create(dst_roi.size(), CV_32F);\r
 533     map_y.create(dst_roi.size(), CV_32F);\r
 534     imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 535                                 static_cast<float>(f), static_cast<float>(s), static_cast<float>(dist), \r
 536                                 0.5f*src_size.width, 0.5f*src_size.height, StreamAccessor::getStream(stream));\r
 537 }\r
 538 \r
 539 //////////////////////////////////////////////////////////////////////////////\r
 540 // buildWarpCylyndricalMaps\r
 541 \r
 542 namespace cv { namespace gpu { namespace imgproc\r
 543 {\r
 544     void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 545                                   const float r[9], const float rinv[9], float f, float s,\r
 546                                   float half_w, float half_h, cudaStream_t stream);\r
 547 }}}\r
 548 \r
 549 void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 550                                        GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 551 {\r
 552     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 553     Mat Rinv = R.inv();\r
 554     CV_Assert(Rinv.isContinuous());\r
 555 \r
 556     map_x.create(dst_roi.size(), CV_32F);\r
 557     map_y.create(dst_roi.size(), CV_32F);\r
 558     imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 559                                       static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
 560                                       StreamAccessor::getStream(stream));\r
 561 }\r
 562 \r
 563 \r
 564 //////////////////////////////////////////////////////////////////////////////\r
 565 // buildWarpSphericalMaps\r
 566 \r
 567 namespace cv { namespace gpu { namespace imgproc\r
 568 {\r
 569     void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 570                                 const float r[9], const float rinv[9], float f, float s,\r
 571                                 float half_w, float half_h, cudaStream_t stream);\r
 572 }}}\r
 573 \r
 574 void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 575                                      GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 576 {\r
 577     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 578     Mat Rinv = R.inv();\r
 579     CV_Assert(Rinv.isContinuous());\r
 580 \r
 581     map_x.create(dst_roi.size(), CV_32F);\r
 582     map_y.create(dst_roi.size(), CV_32F);\r
 583     imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 584                                     static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
 585                                     StreamAccessor::getStream(stream));\r
 586 }\r
 587 \r
 588 ////////////////////////////////////////////////////////////////////////\r
 589 // rotate\r
 590 \r
 591 void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& s)\r
 592 {\r
 593     static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 594 \r
 595     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
 596     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 597 \r
 598     dst.create(dsize, src.type());\r
 599 \r
 600     NppiSize srcsz;\r
 601     srcsz.height = src.rows;\r
 602     srcsz.width = src.cols;\r
 603     NppiRect srcroi;\r
 604     srcroi.x = srcroi.y = 0;\r
 605     srcroi.height = src.rows;\r
 606     srcroi.width = src.cols;\r
 607     NppiRect dstroi;\r
 608     dstroi.x = dstroi.y = 0;\r
 609     dstroi.height = dst.rows;\r
 610     dstroi.width = dst.cols;\r
 611 \r
 612     cudaStream_t stream = StreamAccessor::getStream(s);\r
 613 \r
 614     NppStreamHandler h(stream);\r
 615 \r
 616     if (src.type() == CV_8UC1)\r
 617     {\r
 618         nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 619             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 620     }\r
 621     else\r
 622     {\r
 623         nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 624             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 625     }\r
 626 \r
 627     if (stream == 0)\r
 628         cudaSafeCall( cudaDeviceSynchronize() );\r
 629 }\r
 630 \r
 631 ////////////////////////////////////////////////////////////////////////\r
 632 // integral\r
 633 \r
 634 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)\r
 635 {\r
 636     GpuMat buffer;\r
 637     integralBuffered(src, sum, buffer, s);\r
 638 }\r
 639 \r
 640 void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)\r
 641 {\r
 642     CV_Assert(src.type() == CV_8UC1);\r
 643 \r
 644     sum.create(src.rows + 1, src.cols + 1, CV_32S);\r
 645     \r
 646     NcvSize32u roiSize;\r
 647     roiSize.width = src.cols;\r
 648     roiSize.height = src.rows;\r
 649 \r
 650         cudaDeviceProp prop;\r
 651         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 652 \r
 653     Ncv32u bufSize;\r
 654     nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );\r
 655     ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);\r
 656 \r
 657     cudaStream_t stream = StreamAccessor::getStream(s);\r
 658 \r
 659     NppStStreamHandler h(stream);\r
 660 \r
 661     nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step), \r
 662         sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );\r
 663 \r
 664     if (stream == 0)\r
 665         cudaSafeCall( cudaDeviceSynchronize() );\r
 666 }\r
 667 \r
 668 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& s)\r
 669 {\r
 670     CV_Assert(src.type() == CV_8UC1);\r
 671 \r
 672     int width = src.cols + 1, height = src.rows + 1;\r
 673 \r
 674     sum.create(height, width, CV_32S);\r
 675     sqsum.create(height, width, CV_32F);\r
 676 \r
 677     NppiSize sz;\r
 678     sz.width = src.cols;\r
 679     sz.height = src.rows;\r
 680 \r
 681     cudaStream_t stream = StreamAccessor::getStream(s);\r
 682 \r
 683     NppStreamHandler h(stream);\r
 684 \r
 685     nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), static_cast<int>(src.step), \r
 686         sum.ptr<Npp32s>(), static_cast<int>(sum.step), sqsum.ptr<Npp32f>(), static_cast<int>(sqsum.step), sz, 0, 0.0f, height) );\r
 687 \r
 688     if (stream == 0)\r
 689         cudaSafeCall( cudaDeviceSynchronize() );\r
 690 }\r
 691 \r
 692 //////////////////////////////////////////////////////////////////////////////\r
 693 // sqrIntegral\r
 694 \r
 695 void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)\r
 696 {\r
 697     CV_Assert(src.type() == CV_8U);\r
 698 \r
 699     NcvSize32u roiSize;\r
 700     roiSize.width = src.cols;\r
 701     roiSize.height = src.rows;\r
 702 \r
 703         cudaDeviceProp prop;\r
 704         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 705 \r
 706     Ncv32u bufSize;\r
 707     nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));       \r
 708     GpuMat buf(1, bufSize, CV_8U);\r
 709 \r
 710     cudaStream_t stream = StreamAccessor::getStream(s);\r
 711 \r
 712     NppStStreamHandler h(stream);\r
 713 \r
 714     sqsum.create(src.rows + 1, src.cols + 1, CV_64F);\r
 715     nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step), \r
 716             sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));\r
 717 \r
 718     if (stream == 0)\r
 719         cudaSafeCall( cudaDeviceSynchronize() );\r
 720 }\r
 721 \r
 722 //////////////////////////////////////////////////////////////////////////////\r
 723 // columnSum\r
 724 \r
 725 namespace cv { namespace gpu { namespace imgproc\r
 726 {\r
 727     void columnSum_32F(const DevMem2D src, const DevMem2D dst);\r
 728 }}}\r
 729 \r
 730 void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)\r
 731 {\r
 732     CV_Assert(src.type() == CV_32F);\r
 733 \r
 734     dst.create(src.size(), CV_32F);\r
 735     imgproc::columnSum_32F(src, dst);\r
 736 }\r
 737 \r
 738 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)\r
 739 {\r
 740     CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);\r
 741 \r
 742     dst.create(src.size(), CV_32FC1);\r
 743 \r
 744     NppiSize sz;\r
 745     sz.width = src.cols;\r
 746     sz.height = src.rows;\r
 747 \r
 748     NppiRect nppRect;\r
 749     nppRect.height = rect.height;\r
 750     nppRect.width = rect.width;\r
 751     nppRect.x = rect.x;\r
 752     nppRect.y = rect.y;\r
 753 \r
 754     cudaStream_t stream = StreamAccessor::getStream(s);\r
 755 \r
 756     NppStreamHandler h(stream);\r
 757 \r
 758     nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp32f>(), static_cast<int>(sqr.step),\r
 759                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );\r
 760 \r
 761     if (stream == 0)\r
 762         cudaSafeCall( cudaDeviceSynchronize() );\r
 763 }\r
 764 \r
 765 \r
 766 ////////////////////////////////////////////////////////////////////////\r
 767 // Histogram\r
 768 \r
 769 namespace\r
 770 {\r
 771     template<int n> struct NPPTypeTraits;\r
 772     template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };\r
 773     template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };\r
 774     template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };\r
 775     template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };\r
 776 \r
 777     typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);\r
 778     typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);\r
 779 \r
 780     template<int SDEPTH> struct NppHistogramEvenFuncC1\r
 781     {\r
 782         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 783 \r
 784         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,\r
 785                     int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);\r
 786     };\r
 787     template<int SDEPTH> struct NppHistogramEvenFuncC4\r
 788     {\r
 789         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 790 \r
 791         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,\r
 792             Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);\r
 793     };\r
 794 \r
 795     template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
 796     struct NppHistogramEvenC1\r
 797     {\r
 798         typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;\r
 799 \r
 800         static void hist(const GpuMat& src, GpuMat& hist, GpuMat& buffer, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)\r
 801         {\r
 802             int levels = histSize + 1;\r
 803             hist.create(1, histSize, CV_32S);\r
 804 \r
 805             NppiSize sz;\r
 806             sz.width = src.cols;\r
 807             sz.height = src.rows;\r
 808 \r
 809             int buf_size;\r
 810             get_buf_size(sz, levels, &buf_size);\r
 811 \r
 812             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 813 \r
 814             NppStreamHandler h(stream);\r
 815 \r
 816             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,\r
 817                 lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 818 \r
 819             if (stream == 0)\r
 820                 cudaSafeCall( cudaDeviceSynchronize() );\r
 821         }\r
 822     };\r
 823     template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
 824     struct NppHistogramEvenC4\r
 825     {\r
 826         typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;\r
 827 \r
 828         static void hist(const GpuMat& src, GpuMat hist[4], GpuMat& buffer, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)\r
 829         {\r
 830             int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};\r
 831             hist[0].create(1, histSize[0], CV_32S);\r
 832             hist[1].create(1, histSize[1], CV_32S);\r
 833             hist[2].create(1, histSize[2], CV_32S);\r
 834             hist[3].create(1, histSize[3], CV_32S);\r
 835 \r
 836             NppiSize sz;\r
 837             sz.width = src.cols;\r
 838             sz.height = src.rows;\r
 839 \r
 840             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
 841 \r
 842             int buf_size;\r
 843             get_buf_size(sz, levels, &buf_size);\r
 844 \r
 845             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 846 \r
 847             NppStreamHandler h(stream);\r
 848 \r
 849             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 850 \r
 851             if (stream == 0)\r
 852                 cudaSafeCall( cudaDeviceSynchronize() );\r
 853         }\r
 854     };\r
 855 \r
 856     template<int SDEPTH> struct NppHistogramRangeFuncC1\r
 857     {\r
 858         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 859         typedef Npp32s level_t;\r
 860         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 861 \r
 862         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 863             const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);\r
 864     };\r
 865     template<> struct NppHistogramRangeFuncC1<CV_32F>\r
 866     {\r
 867         typedef Npp32f src_t;\r
 868         typedef Npp32f level_t;\r
 869         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
 870 \r
 871         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 872             const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);\r
 873     };\r
 874     template<int SDEPTH> struct NppHistogramRangeFuncC4\r
 875     {\r
 876         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 877         typedef Npp32s level_t;\r
 878         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 879 \r
 880         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
 881             const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
 882     };\r
 883     template<> struct NppHistogramRangeFuncC4<CV_32F>\r
 884     {\r
 885         typedef Npp32f src_t;\r
 886         typedef Npp32f level_t;\r
 887         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
 888 \r
 889         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
 890             const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
 891     };\r
 892 \r
 893     template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
 894     struct NppHistogramRangeC1\r
 895     {\r
 896         typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;\r
 897         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
 898         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
 899 \r
 900         static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buffer, cudaStream_t stream)\r
 901         {\r
 902             CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1);\r
 903 \r
 904             hist.create(1, levels.cols - 1, CV_32S);\r
 905 \r
 906             NppiSize sz;\r
 907             sz.width = src.cols;\r
 908             sz.height = src.rows;\r
 909 \r
 910             int buf_size;\r
 911             get_buf_size(sz, levels.cols, &buf_size);\r
 912             \r
 913             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 914 \r
 915             NppStreamHandler h(stream);\r
 916 \r
 917             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );\r
 918 \r
 919             if (stream == 0)\r
 920                 cudaSafeCall( cudaDeviceSynchronize() );\r
 921         }\r
 922     };\r
 923     template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
 924     struct NppHistogramRangeC4\r
 925     {\r
 926         typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;\r
 927         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
 928         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
 929 \r
 930         static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buffer, cudaStream_t stream)\r
 931         {\r
 932             CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1);\r
 933             CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1);\r
 934             CV_Assert(levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1);\r
 935             CV_Assert(levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1);\r
 936 \r
 937             hist[0].create(1, levels[0].cols - 1, CV_32S);\r
 938             hist[1].create(1, levels[1].cols - 1, CV_32S);\r
 939             hist[2].create(1, levels[2].cols - 1, CV_32S);\r
 940             hist[3].create(1, levels[3].cols - 1, CV_32S);\r
 941 \r
 942             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
 943             int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};\r
 944             const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};\r
 945 \r
 946             NppiSize sz;\r
 947             sz.width = src.cols;\r
 948             sz.height = src.rows;\r
 949 \r
 950             int buf_size;\r
 951             get_buf_size(sz, nLevels, &buf_size);\r
 952 \r
 953             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 954 \r
 955             NppStreamHandler h(stream);\r
 956 \r
 957             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );\r
 958 \r
 959             if (stream == 0)\r
 960                 cudaSafeCall( cudaDeviceSynchronize() );\r
 961         }\r
 962     };\r
 963 }\r
 964 \r
 965 void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)\r
 966 {\r
 967     Mat host_levels(1, nLevels, CV_32SC1);\r
 968     nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );\r
 969     levels.upload(host_levels);\r
 970 }\r
 971 \r
 972 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
 973 {\r
 974     GpuMat buf;\r
 975     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
 976 }\r
 977 \r
 978 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
 979 {\r
 980     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );\r
 981 \r
 982     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, GpuMat& buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);\r
 983     static const hist_t hist_callers[] =\r
 984     {\r
 985         NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,\r
 986         0,\r
 987         NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,\r
 988         NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist\r
 989     };\r
 990 \r
 991     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
 992 }\r
 993 \r
 994 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
 995 {\r
 996     GpuMat buf;\r
 997     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
 998 }\r
 999 \r
1000 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
1001 {\r
1002     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );\r
1003 \r
1004     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);\r
1005     static const hist_t hist_callers[] =\r
1006     {\r
1007         NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,\r
1008         0,\r
1009         NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,\r
1010         NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist\r
1011     };\r
1012 \r
1013     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
1014 }\r
1015 \r
1016 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream)\r
1017 {\r
1018     GpuMat buf;\r
1019     histRange(src, hist, levels, buf, stream);\r
1020 }\r
1021 \r
1022 \r
1023 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream)\r
1024 {\r
1025     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);\r
1026 \r
1027     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, cudaStream_t stream);\r
1028     static const hist_t hist_callers[] =\r
1029     {\r
1030         NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,\r
1031         0,\r
1032         NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,\r
1033         NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,\r
1034         0,\r
1035         NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist\r
1036     };\r
1037 \r
1038     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1039 }\r
1040 \r
1041 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)\r
1042 {\r
1043     GpuMat buf;\r
1044     histRange(src, hist, levels, buf, stream);\r
1045 }\r
1046 \r
1047 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream)\r
1048 {\r
1049     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4);\r
1050 \r
1051     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, cudaStream_t stream);\r
1052     static const hist_t hist_callers[] =\r
1053     {\r
1054         NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,\r
1055         0,\r
1056         NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,\r
1057         NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,\r
1058         0,\r
1059         NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist\r
1060     };\r
1061 \r
1062     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1063 }\r
1064 \r
1065 namespace cv { namespace gpu { namespace histograms\r
1066 {\r
1067     void histogram256_gpu(DevMem2D src, int* hist, unsigned int* buf, cudaStream_t stream);\r
1068 \r
1069     const int PARTIAL_HISTOGRAM256_COUNT = 240;\r
1070     const int HISTOGRAM256_BIN_COUNT     = 256;\r
1071 }}}\r
1072 \r
1073 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)\r
1074 {\r
1075     GpuMat buf;\r
1076     calcHist(src, hist, buf, stream);\r
1077 }\r
1078 \r
1079 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)\r
1080 {\r
1081     using namespace cv::gpu::histograms;\r
1082 \r
1083     CV_Assert(src.type() == CV_8UC1);\r
1084 \r
1085     hist.create(1, 256, CV_32SC1);\r
1086 \r
1087     ensureSizeIsEnough(1, PARTIAL_HISTOGRAM256_COUNT * HISTOGRAM256_BIN_COUNT, CV_32SC1, buf);\r
1088 \r
1089     histogram256_gpu(src, hist.ptr<int>(), buf.ptr<unsigned int>(), StreamAccessor::getStream(stream));\r
1090 }\r
1091 \r
1092 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1093 {\r
1094     GpuMat hist;\r
1095     GpuMat buf;\r
1096     equalizeHist(src, dst, hist, buf, stream);\r
1097 }\r
1098 \r
1099 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream)\r
1100 {\r
1101     GpuMat buf;\r
1102     equalizeHist(src, dst, hist, buf, stream);\r
1103 }\r
1104 \r
1105 namespace cv { namespace gpu { namespace histograms\r
1106 {\r
1107     void equalizeHist_gpu(DevMem2D src, DevMem2D dst, const int* lut, cudaStream_t stream);\r
1108 }}}\r
1109 \r
1110 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)\r
1111 {\r
1112     using namespace cv::gpu::histograms;\r
1113 \r
1114     CV_Assert(src.type() == CV_8UC1);\r
1115 \r
1116     dst.create(src.size(), src.type());\r
1117 \r
1118     int intBufSize;\r
1119     nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) );\r
1120 \r
1121     int bufSize = static_cast<int>(std::max(256 * 240 * sizeof(int), intBufSize + 256 * sizeof(int)));\r
1122 \r
1123     ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);\r
1124 \r
1125     GpuMat histBuf(1, 256 * 240, CV_32SC1, buf.ptr());\r
1126     GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr());\r
1127     GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize);\r
1128 \r
1129     calcHist(src, hist, histBuf, s);\r
1130 \r
1131     cudaStream_t stream = StreamAccessor::getStream(s);\r
1132 \r
1133     NppStreamHandler h(stream);\r
1134 \r
1135     nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) );\r
1136     \r
1137     if (stream == 0)\r
1138         cudaSafeCall( cudaDeviceSynchronize() );\r
1139 \r
1140     equalizeHist_gpu(src, dst, lut.ptr<int>(), stream);\r
1141 }\r
1142 \r
1143 ////////////////////////////////////////////////////////////////////////\r
1144 // cornerHarris & minEgenVal\r
1145 \r
1146 namespace cv { namespace gpu { namespace imgproc {\r
1147 \r
1148     void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst);\r
1149     void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1150     void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1151 \r
1152 }}}\r
1153 \r
1154 namespace \r
1155 {\r
1156     template <typename T>\r
1157     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1158     {   \r
1159         double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;\r
1160         if (ksize < 0) \r
1161             scale *= 2.;\r
1162         if (src.depth() == CV_8U)\r
1163             scale *= 255.;\r
1164         scale = 1./scale;\r
1165 \r
1166         Dx.create(src.size(), CV_32F);\r
1167         Dy.create(src.size(), CV_32F);\r
1168 \r
1169         if (ksize > 0)\r
1170         {\r
1171             Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, borderType);\r
1172             Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, borderType);\r
1173         }\r
1174         else\r
1175         {\r
1176             Scharr(src, Dx, CV_32F, 1, 0, scale, borderType);\r
1177             Scharr(src, Dy, CV_32F, 0, 1, scale, borderType);\r
1178         }\r
1179     }\r
1180 \r
1181     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1182     {\r
1183         switch (src.type())\r
1184         {\r
1185         case CV_8U:\r
1186             extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, borderType);\r
1187             break;\r
1188         case CV_32F:\r
1189             extractCovData<float>(src, Dx, Dy, blockSize, ksize, borderType);\r
1190             break;\r
1191         default:\r
1192             CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");\r
1193         }\r
1194     }\r
1195 \r
1196 } // Anonymous namespace\r
1197 \r
1198 \r
1199 bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)\r
1200 {\r
1201     if (cpuBorderType == cv::BORDER_REFLECT101)\r
1202     {\r
1203         gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;\r
1204         return true;\r
1205     }\r
1206 \r
1207     if (cpuBorderType == cv::BORDER_REPLICATE)\r
1208     {\r
1209         gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;\r
1210         return true;\r
1211     }\r
1212     \r
1213     if (cpuBorderType == cv::BORDER_CONSTANT)\r
1214     {\r
1215         gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;\r
1216         return true;\r
1217     }\r
1218 \r
1219     return false;\r
1220 }\r
1221 \r
1222 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)\r
1223 {\r
1224     GpuMat Dx, Dy;\r
1225     cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k, borderType);\r
1226 }\r
1227 \r
1228 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType)\r
1229 {\r
1230     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1231               borderType == cv::BORDER_REPLICATE);\r
1232 \r
1233     int gpuBorderType;\r
1234     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1235 \r
1236     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);\r
1237     dst.create(src.size(), CV_32F);\r
1238     imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);\r
1239 }\r
1240 \r
1241 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)\r
1242 {  \r
1243     GpuMat Dx, Dy;\r
1244     cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize, borderType);\r
1245 }\r
1246 \r
1247 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1248 {  \r
1249     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1250               borderType == cv::BORDER_REPLICATE);\r
1251 \r
1252     int gpuBorderType;\r
1253     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1254 \r
1255     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);    \r
1256     dst.create(src.size(), CV_32F);\r
1257     imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);\r
1258 }\r
1259 \r
1260 //////////////////////////////////////////////////////////////////////////////\r
1261 // mulSpectrums\r
1262 \r
1263 namespace cv { namespace gpu { namespace imgproc \r
1264 {\r
1265     void mulSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1266                       DevMem2D_<cufftComplex> c);\r
1267 \r
1268     void mulSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1269                            DevMem2D_<cufftComplex> c);\r
1270 }}}\r
1271 \r
1272 \r
1273 void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, \r
1274                            int flags, bool conjB) \r
1275 {\r
1276     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>, \r
1277                            DevMem2D_<cufftComplex>);\r
1278     static Caller callers[] = { imgproc::mulSpectrums, \r
1279                                 imgproc::mulSpectrums_CONJ };\r
1280 \r
1281     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1282     CV_Assert(a.size() == b.size());\r
1283 \r
1284     c.create(a.size(), CV_32FC2);\r
1285 \r
1286     Caller caller = callers[(int)conjB];\r
1287     caller(a, b, c);\r
1288 }\r
1289 \r
1290 //////////////////////////////////////////////////////////////////////////////\r
1291 // mulAndScaleSpectrums\r
1292 \r
1293 namespace cv { namespace gpu { namespace imgproc \r
1294 {\r
1295     void mulAndScaleSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1296                              float scale, DevMem2D_<cufftComplex> c);\r
1297 \r
1298     void mulAndScaleSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1299                                   float scale, DevMem2D_<cufftComplex> c);\r
1300 }}}\r
1301 \r
1302 \r
1303 void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,\r
1304                                   int flags, float scale, bool conjB) \r
1305 {\r
1306     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>,\r
1307                            float scale, DevMem2D_<cufftComplex>);\r
1308     static Caller callers[] = { imgproc::mulAndScaleSpectrums, \r
1309                                 imgproc::mulAndScaleSpectrums_CONJ };\r
1310 \r
1311     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1312     CV_Assert(a.size() == b.size());\r
1313 \r
1314     c.create(a.size(), CV_32FC2);\r
1315 \r
1316     Caller caller = callers[(int)conjB];\r
1317     caller(a, b, scale, c);\r
1318 }\r
1319 \r
1320 //////////////////////////////////////////////////////////////////////////////\r
1321 // dft\r
1322 \r
1323 void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)\r
1324 {\r
1325     CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);\r
1326 \r
1327     // We don't support unpacked output (in the case of real input)\r
1328     CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));\r
1329 \r
1330     bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);\r
1331     int is_row_dft = flags & DFT_ROWS;\r
1332     int is_scaled_dft = flags & DFT_SCALE;\r
1333     int is_inverse = flags & DFT_INVERSE;\r
1334     bool is_complex_input = src.channels() == 2;\r
1335     bool is_complex_output = !(flags & DFT_REAL_OUTPUT);\r
1336 \r
1337     // We don't support real-to-real transform\r
1338     CV_Assert(is_complex_input || is_complex_output);\r
1339 \r
1340     GpuMat src_data;\r
1341 \r
1342     // Make sure here we work with the continuous input, \r
1343     // as CUFFT can't handle gaps\r
1344     src_data = src;\r
1345     createContinuous(src.rows, src.cols, src.type(), src_data);\r
1346     if (src_data.data != src.data)\r
1347         src.copyTo(src_data);\r
1348 \r
1349     Size dft_size_opt = dft_size;\r
1350     if (is_1d_input && !is_row_dft)\r
1351     {\r
1352         // If the source matrix is single column handle it as single row\r
1353         dft_size_opt.width = std::max(dft_size.width, dft_size.height);\r
1354         dft_size_opt.height = std::min(dft_size.width, dft_size.height);\r
1355     }\r
1356 \r
1357     cufftType dft_type = CUFFT_R2C;\r
1358     if (is_complex_input) \r
1359         dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
1360 \r
1361     CV_Assert(dft_size_opt.width > 1);\r
1362 \r
1363     cufftHandle plan;\r
1364     if (is_1d_input || is_row_dft)\r
1365         cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);\r
1366     else\r
1367         cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);\r
1368 \r
1369     if (is_complex_input)\r
1370     {\r
1371         if (is_complex_output)\r
1372         {\r
1373             createContinuous(dft_size, CV_32FC2, dst);\r
1374             cufftSafeCall(cufftExecC2C(\r
1375                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),\r
1376                     is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));\r
1377         }\r
1378         else\r
1379         {\r
1380             createContinuous(dft_size, CV_32F, dst);\r
1381             cufftSafeCall(cufftExecC2R(\r
1382                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftReal>()));\r
1383         }\r
1384     }\r
1385     else\r
1386     {\r
1387         // We could swap dft_size for efficiency. Here we must reflect it\r
1388         if (dft_size == dft_size_opt)\r
1389             createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);\r
1390         else\r
1391             createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);\r
1392 \r
1393         cufftSafeCall(cufftExecR2C(\r
1394                 plan, src_data.ptr<cufftReal>(), dst.ptr<cufftComplex>()));\r
1395     }\r
1396 \r
1397     cufftSafeCall(cufftDestroy(plan));\r
1398 \r
1399     if (is_scaled_dft)\r
1400         multiply(dst, Scalar::all(1. / dft_size.area()), dst);\r
1401 }\r
1402 \r
1403 //////////////////////////////////////////////////////////////////////////////\r
1404 // convolve\r
1405 \r
1406 \r
1407 void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)\r
1408 {\r
1409     result_size = Size(image_size.width - templ_size.width + 1,\r
1410                        image_size.height - templ_size.height + 1);\r
1411     block_size = estimateBlockSize(result_size, templ_size);\r
1412 \r
1413     dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);\r
1414     dft_size.height = getOptimalDFTSize(block_size.width + templ_size.height - 1);\r
1415     createContinuous(dft_size, CV_32F, image_block);\r
1416     createContinuous(dft_size, CV_32F, templ_block);\r
1417     createContinuous(dft_size, CV_32F, result_data);\r
1418 \r
1419     spect_len = dft_size.height * (dft_size.width / 2 + 1);\r
1420     createContinuous(1, spect_len, CV_32FC2, image_spect);\r
1421     createContinuous(1, spect_len, CV_32FC2, templ_spect);\r
1422     createContinuous(1, spect_len, CV_32FC2, result_spect);\r
1423 \r
1424     block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);\r
1425     block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);\r
1426 }\r
1427 \r
1428 \r
1429 Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)\r
1430 {\r
1431     int scale = 40;\r
1432     Size bsize_min(1024, 1024);\r
1433 \r
1434     // Check whether we use Fermi generation or newer GPU\r
1435     if (DeviceInfo().majorVersion() >= 2)\r
1436     {\r
1437         bsize_min.width = 2048;\r
1438         bsize_min.height = 2048;\r
1439     }\r
1440 \r
1441     Size bsize(std::max(templ_size.width * scale, bsize_min.width),\r
1442                std::max(templ_size.height * scale, bsize_min.height));\r
1443 \r
1444     bsize.width = std::min(bsize.width, result_size.width);\r
1445     bsize.height = std::min(bsize.height, result_size.height);\r
1446     return bsize;\r
1447 }\r
1448 \r
1449 \r
1450 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1451                        bool ccorr)\r
1452 {\r
1453     ConvolveBuf buf;\r
1454     convolve(image, templ, result, ccorr, buf);\r
1455 }\r
1456 \r
1457 \r
1458 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1459                        bool ccorr, ConvolveBuf& buf)\r
1460 {\r
1461     StaticAssert<sizeof(float) == sizeof(cufftReal)>::check();\r
1462     StaticAssert<sizeof(float) * 2 == sizeof(cufftComplex)>::check();\r
1463 \r
1464     CV_Assert(image.type() == CV_32F);\r
1465     CV_Assert(templ.type() == CV_32F);\r
1466 \r
1467     buf.create(image.size(), templ.size());\r
1468     result.create(buf.result_size, CV_32F);\r
1469 \r
1470     Size& block_size = buf.block_size;\r
1471     Size& dft_size = buf.dft_size;\r
1472 \r
1473     GpuMat& image_block = buf.image_block;\r
1474     GpuMat& templ_block = buf.templ_block;\r
1475     GpuMat& result_data = buf.result_data;\r
1476 \r
1477     GpuMat& image_spect = buf.image_spect;\r
1478     GpuMat& templ_spect = buf.templ_spect;\r
1479     GpuMat& result_spect = buf.result_spect;\r
1480 \r
1481     cufftHandle planR2C, planC2R;\r
1482     cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
1483     cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
1484 \r
1485     GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
1486     copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
1487                    templ_block.cols - templ_roi.cols, 0);\r
1488 \r
1489     cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
1490                                templ_spect.ptr<cufftComplex>()));\r
1491 \r
1492     // Process all blocks of the result matrix\r
1493     for (int y = 0; y < result.rows; y += block_size.height)\r
1494     {\r
1495         for (int x = 0; x < result.cols; x += block_size.width)\r
1496         {\r
1497             Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,\r
1498                                 std::min(y + dft_size.height, image.rows) - y);\r
1499             GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), \r
1500                              image.step);\r
1501             copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,\r
1502                            0, image_block.cols - image_roi.cols, 0);\r
1503 \r
1504             cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(), \r
1505                                        image_spect.ptr<cufftComplex>()));\r
1506             mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,\r
1507                                  1.f / dft_size.area(), ccorr);\r
1508             cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
1509                                        result_data.ptr<cufftReal>()));\r
1510 \r
1511             Size result_roi_size(std::min(x + block_size.width, result.cols) - x,\r
1512                                  std::min(y + block_size.height, result.rows) - y);\r
1513             GpuMat result_roi(result_roi_size, result.type(), \r
1514                               (void*)(result.ptr<float>(y) + x), result.step);\r
1515             GpuMat result_block(result_roi_size, result_data.type(), \r
1516                                 result_data.ptr(), result_data.step);\r
1517             result_block.copyTo(result_roi);\r
1518         }\r
1519     }\r
1520 \r
1521     cufftSafeCall(cufftDestroy(planR2C));\r
1522     cufftSafeCall(cufftDestroy(planC2R));\r
1523 }\r
1524 \r
1525 \r
1526 ////////////////////////////////////////////////////////////////////\r
1527 // downsample\r
1528 \r
1529 namespace cv { namespace gpu { namespace imgproc\r
1530 {\r
1531     template <typename T, int cn>\r
1532     void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
1533 }}}\r
1534 \r
1535 \r
1536 void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1537 {\r
1538     CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
1539 \r
1540     typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
1541     static const Caller callers[6][4] =\r
1542         {{imgproc::downsampleCaller<uchar,1>, imgproc::downsampleCaller<uchar,2>,\r
1543           imgproc::downsampleCaller<uchar,3>, imgproc::downsampleCaller<uchar,4>},\r
1544          {0,0,0,0}, {0,0,0,0},\r
1545          {imgproc::downsampleCaller<short,1>, imgproc::downsampleCaller<short,2>,\r
1546           imgproc::downsampleCaller<short,3>, imgproc::downsampleCaller<short,4>},\r
1547          {0,0,0,0},\r
1548          {imgproc::downsampleCaller<float,1>, imgproc::downsampleCaller<float,2>,\r
1549           imgproc::downsampleCaller<float,3>, imgproc::downsampleCaller<float,4>}};\r
1550 \r
1551     Caller caller = callers[src.depth()][src.channels()-1];\r
1552     if (!caller)\r
1553         CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
1554 \r
1555     dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
1556     caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
1557 }\r
1558 \r
1559 \r
1560 //////////////////////////////////////////////////////////////////////////////\r
1561 // upsample\r
1562 \r
1563 namespace cv { namespace gpu { namespace imgproc\r
1564 {\r
1565     template <typename T, int cn>\r
1566     void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
1567 }}}\r
1568 \r
1569 \r
1570 void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1571 {\r
1572     CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
1573 \r
1574     typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
1575     static const Caller callers[6][5] =\r
1576         {{imgproc::upsampleCaller<uchar,1>, imgproc::upsampleCaller<uchar,2>,\r
1577           imgproc::upsampleCaller<uchar,3>, imgproc::upsampleCaller<uchar,4>},\r
1578          {0,0,0,0}, {0,0,0,0},\r
1579          {imgproc::upsampleCaller<short,1>, imgproc::upsampleCaller<short,2>,\r
1580           imgproc::upsampleCaller<short,3>, imgproc::upsampleCaller<short,4>},\r
1581          {0,0,0,0},\r
1582          {imgproc::upsampleCaller<float,1>, imgproc::upsampleCaller<float,2>,\r
1583           imgproc::upsampleCaller<float,3>, imgproc::upsampleCaller<float,4>}};\r
1584 \r
1585     Caller caller = callers[src.depth()][src.channels()-1];\r
1586     if (!caller)\r
1587         CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
1588 \r
1589     dst.create(src.rows*2, src.cols*2, src.type());\r
1590     caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
1591 }\r
1592 \r
1593 \r
1594 //////////////////////////////////////////////////////////////////////////////\r
1595 // pyrDown\r
1596 \r
1597 namespace cv { namespace gpu { namespace imgproc\r
1598 {\r
1599     template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
1600 }}}\r
1601 \r
1602 void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1603 {\r
1604     using namespace cv::gpu::imgproc;\r
1605 \r
1606     typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
1607 \r
1608     static const func_t funcs[6][4] = \r
1609     {\r
1610         {pyrDown_gpu<uchar, 1>, pyrDown_gpu<uchar, 2>, pyrDown_gpu<uchar, 3>, pyrDown_gpu<uchar, 4>},\r
1611         {pyrDown_gpu<schar, 1>, pyrDown_gpu<schar, 2>, pyrDown_gpu<schar, 3>, pyrDown_gpu<schar, 4>},\r
1612         {pyrDown_gpu<ushort, 1>, pyrDown_gpu<ushort, 2>, pyrDown_gpu<ushort, 3>, pyrDown_gpu<ushort, 4>},\r
1613         {pyrDown_gpu<short, 1>, pyrDown_gpu<short, 2>, pyrDown_gpu<short, 3>, pyrDown_gpu<short, 4>},\r
1614         {pyrDown_gpu<int, 1>, pyrDown_gpu<int, 2>, pyrDown_gpu<int, 3>, pyrDown_gpu<int, 4>},\r
1615         {pyrDown_gpu<float, 1>, pyrDown_gpu<float, 2>, pyrDown_gpu<float, 3>, pyrDown_gpu<float, 4>},\r
1616     };\r
1617 \r
1618     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
1619 \r
1620     dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
1621 \r
1622     funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream));\r
1623 }\r
1624 \r
1625 \r
1626 //////////////////////////////////////////////////////////////////////////////\r
1627 // pyrUp\r
1628 \r
1629 namespace cv { namespace gpu { namespace imgproc\r
1630 {\r
1631     template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
1632 }}}\r
1633 \r
1634 void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1635 {\r
1636     using namespace cv::gpu::imgproc;\r
1637 \r
1638     typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
1639 \r
1640     static const func_t funcs[6][4] = \r
1641     {\r
1642         {pyrUp_gpu<uchar, 1>, pyrUp_gpu<uchar, 2>, pyrUp_gpu<uchar, 3>, pyrUp_gpu<uchar, 4>},\r
1643         {pyrUp_gpu<schar, 1>, pyrUp_gpu<schar, 2>, pyrUp_gpu<schar, 3>, pyrUp_gpu<schar, 4>},\r
1644         {pyrUp_gpu<ushort, 1>, pyrUp_gpu<ushort, 2>, pyrUp_gpu<ushort, 3>, pyrUp_gpu<ushort, 4>},\r
1645         {pyrUp_gpu<short, 1>, pyrUp_gpu<short, 2>, pyrUp_gpu<short, 3>, pyrUp_gpu<short, 4>},\r
1646         {pyrUp_gpu<int, 1>, pyrUp_gpu<int, 2>, pyrUp_gpu<int, 3>, pyrUp_gpu<int, 4>},\r
1647         {pyrUp_gpu<float, 1>, pyrUp_gpu<float, 2>, pyrUp_gpu<float, 3>, pyrUp_gpu<float, 4>},\r
1648     };\r
1649 \r
1650     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
1651 \r
1652     dst.create(src.rows*2, src.cols*2, src.type());\r
1653 \r
1654     funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream));\r
1655 }\r
1656 \r
1657 \r
1658 //////////////////////////////////////////////////////////////////////////////\r
1659 // Canny\r
1660 \r
1661 cv::gpu::CannyBuf::CannyBuf(const GpuMat& dx_, const GpuMat& dy_) : dx(dx_), dy(dy_)\r
1662 {\r
1663     CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());\r
1664 \r
1665     create(dx_.size(), -1);\r
1666 }\r
1667 \r
1668 void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)\r
1669 {\r
1670     ensureSizeIsEnough(image_size, CV_32SC1, dx);\r
1671     ensureSizeIsEnough(image_size, CV_32SC1, dy);\r
1672 \r
1673     if (apperture_size == 3)\r
1674     {\r
1675         ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);\r
1676         ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);\r
1677     }\r
1678     else if(apperture_size > 0)\r
1679     {\r
1680         if (!filterDX)\r
1681             filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);\r
1682         if (!filterDY)\r
1683             filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);\r
1684     }\r
1685 \r
1686     ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);\r
1687 \r
1688     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);\r
1689     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);\r
1690 }\r
1691 \r
1692 void cv::gpu::CannyBuf::release()\r
1693 {\r
1694     dx.release();\r
1695     dy.release();\r
1696     dx_buf.release();\r
1697     dy_buf.release();\r
1698     edgeBuf.release();\r
1699     trackBuf1.release();\r
1700     trackBuf2.release();\r
1701 }\r
1702 \r
1703 namespace cv { namespace gpu { namespace canny\r
1704 {    \r
1705     void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);\r
1706 \r
1707     void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1708     void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1709 \r
1710     void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh);\r
1711     \r
1712     void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols);\r
1713 \r
1714     void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);\r
1715 \r
1716     void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols);\r
1717 }}}\r
1718 \r
1719 namespace\r
1720 {\r
1721     void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)\r
1722     {\r
1723         using namespace cv::gpu::canny;\r
1724 \r
1725         calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);\r
1726         \r
1727         edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), dst.rows, dst.cols);\r
1728         \r
1729         edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), buf.trackBuf2.ptr<ushort2>(), dst.rows, dst.cols);\r
1730         \r
1731         getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);\r
1732     }\r
1733 }\r
1734 \r
1735 void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1736 {\r
1737     CannyBuf buf(src.size(), apperture_size);\r
1738     Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);\r
1739 }\r
1740 \r
1741 void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1742 {\r
1743     using namespace cv::gpu::canny;\r
1744 \r
1745     CV_Assert(src.type() == CV_8UC1);\r
1746 \r
1747     if( low_thresh > high_thresh )\r
1748         std::swap( low_thresh, high_thresh);\r
1749 \r
1750     dst.create(src.size(), CV_8U);\r
1751     dst.setTo(Scalar::all(0));\r
1752     \r
1753     buf.create(src.size(), apperture_size);\r
1754     buf.edgeBuf.setTo(Scalar::all(0));\r
1755 \r
1756     if (apperture_size == 3)\r
1757     {\r
1758         calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);\r
1759 \r
1760         calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1761     }\r
1762     else\r
1763     {\r
1764         buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows));\r
1765         buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows));\r
1766 \r
1767         calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1768     }\r
1769 \r
1770     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1771 }\r
1772 \r
1773 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1774 {\r
1775     CannyBuf buf(dx, dy);\r
1776     Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);\r
1777 }\r
1778 \r
1779 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1780 {\r
1781     using namespace cv::gpu::canny;\r
1782 \r
1783     CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());\r
1784 \r
1785     if( low_thresh > high_thresh )\r
1786         std::swap( low_thresh, high_thresh);\r
1787 \r
1788     dst.create(dx.size(), CV_8U);\r
1789     dst.setTo(Scalar::all(0));\r
1790     \r
1791     buf.dx = dx; buf.dy = dy;\r
1792     buf.create(dx.size(), -1);\r
1793     buf.edgeBuf.setTo(Scalar::all(0));\r
1794 \r
1795     calcMagnitude_gpu(dx, dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);\r
1796 \r
1797     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1798 }\r
1799 \r
1800 #endif /* !defined (HAVE_CUDA) */\r
1801 \r
1802 \r