modules/gpu/src/imgproc.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
   2 //\r
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
   4 //\r
   5 //  By downloading, copying, installing or using the software you agree to this license.\r
   6 //  If you do not agree to this license, do not download, install,\r
   7 //  copy or use the software.\r
   8 //\r
   9 //\r
  10 //                           License Agreement\r
  11 //                For Open Source Computer Vision Library\r
  12 //\r
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
  15 // Third party copyrights are property of their respective owners.\r
  16 //\r
  17 // Redistribution and use in source and binary forms, with or without modification,\r
  18 // are permitted provided that the following conditions are met:\r
  19 //\r
  20 //   * Redistribution's of source code must retain the above copyright notice,\r
  21 //     this list of conditions and the following disclaimer.\r
  22 //\r
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
  24 //     this list of conditions and the following disclaimer in the documentation\r
  25 //     and/or other materials provided with the distribution.\r
  26 //\r
  27 //   * The name of the copyright holders may not be used to endorse or promote products\r
  28 //     derived from this software without specific prior written permission.\r
  29 //\r
  30 // This software is provided by the copyright holders and contributors "as is" and\r
  31 // any express or implied warranties, including, but not limited to, the implied\r
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
  34 // indirect, incidental, special, exemplary, or consequential damages\r
  35 // (including, but not limited to, procurement of substitute goods or services;\r
  36 // loss of use, data, or profits; or business interruption) however caused\r
  37 // and on any theory of liability, whether in contract, strict liability,\r
  38 // or tort (including negligence or otherwise) arising in any way out of\r
  39 // the use of this software, even if advised of the possibility of such damage.\r
  40 //\r
  41 //M*/\r
  42 \r
  43 #include "precomp.hpp"\r
  44 \r
  45 using namespace cv;\r
  46 using namespace cv::gpu;\r
  47 \r
  48 #if !defined (HAVE_CUDA)\r
  49 \r
  50 void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&){ throw_nogpu(); }\r
  51 void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  52 void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  53 void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  54 void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }\r
  55 void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }\r
  56 void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
  57 void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  58 void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  59 void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, double, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  60 void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  61 void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  62 void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }\r
  63 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  64 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  65 void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  66 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  67 void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
  68 void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_nogpu(); }\r
  69 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_nogpu(); }\r
  70 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  71 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  72 void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  73 void cv::gpu::histEven(const GpuMat&, GpuMat*, GpuMat&, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  74 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }\r
  75 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  76 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_nogpu(); }\r
  77 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, GpuMat&, Stream&) { throw_nogpu(); }\r
  78 void cv::gpu::calcHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  79 void cv::gpu::calcHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  80 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  81 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  82 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  83 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  84 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  85 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  86 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  87 void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }\r
  88 void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool) { throw_nogpu(); }\r
  89 void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); }\r
  90 void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }\r
  91 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }\r
  92 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }\r
  93 void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  94 void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  95 void cv::gpu::pyrDown(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  96 void cv::gpu::pyrUp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  97 void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
  98 void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
  99 void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
 100 void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
 101 cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); }\r
 102 void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); }\r
 103 void cv::gpu::CannyBuf::release() { throw_nogpu(); }\r
 104 \r
 105 #else /* !defined (HAVE_CUDA) */\r
 106 \r
 107 ////////////////////////////////////////////////////////////////////////\r
 108 // remap\r
 109 \r
 110 namespace cv { namespace gpu {  namespace imgproc\r
 111 {\r
 112     template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, \r
 113                                          int interpolation, int borderMode, const double borderValue[4]);\r
 114 }}}\r
 115 \r
 116 void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue)\r
 117 {\r
 118     using namespace cv::gpu::imgproc;\r
 119 \r
 120     typedef void (*caller_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);;\r
 121     static const caller_t callers[6][4] = \r
 122     {\r
 123         {remap_gpu<uchar>, remap_gpu<uchar2>, remap_gpu<uchar3>, remap_gpu<uchar4>},\r
 124         {remap_gpu<schar>, remap_gpu<char2>, remap_gpu<char3>, remap_gpu<char4>},\r
 125         {remap_gpu<ushort>, remap_gpu<ushort2>, remap_gpu<ushort3>, remap_gpu<ushort4>},\r
 126         {remap_gpu<short>, remap_gpu<short2>, remap_gpu<short3>, remap_gpu<short4>},\r
 127         {remap_gpu<int>, remap_gpu<int2>, remap_gpu<int3>, remap_gpu<int4>},\r
 128         {remap_gpu<float>, remap_gpu<float2>, remap_gpu<float3>, remap_gpu<float4>}\r
 129     };\r
 130 \r
 131     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
 132     CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());\r
 133 \r
 134     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);\r
 135 \r
 136     CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);\r
 137     int gpuBorderType;\r
 138     CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));\r
 139 \r
 140     dst.create(xmap.size(), src.type());\r
 141 \r
 142     callers[src.depth()][src.channels() - 1](src, xmap, ymap, dst, interpolation, gpuBorderType, borderValue.val);\r
 143 }\r
 144 \r
 145 ////////////////////////////////////////////////////////////////////////\r
 146 // meanShiftFiltering_GPU\r
 147 \r
 148 namespace cv { namespace gpu {  namespace imgproc\r
 149 {\r
 150     extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);\r
 151 }}}\r
 152 \r
 153 void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria)\r
 154 {\r
 155     if( src.empty() )\r
 156         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 157 \r
 158     if( src.depth() != CV_8U || src.channels() != 4 )\r
 159         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 160 \r
 161     dst.create( src.size(), CV_8UC4 );\r
 162 \r
 163     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 164         criteria.maxCount = 5;\r
 165 \r
 166     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 167 \r
 168     float eps;\r
 169     if( !(criteria.type & TermCriteria::EPS) )\r
 170         eps = 1.f;\r
 171     eps = (float)std::max(criteria.epsilon, 0.0);\r
 172 \r
 173     imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);\r
 174 }\r
 175 \r
 176 ////////////////////////////////////////////////////////////////////////\r
 177 // meanShiftProc_GPU\r
 178 \r
 179 namespace cv { namespace gpu {  namespace imgproc\r
 180 {\r
 181     extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);\r
 182 }}}\r
 183 \r
 184 void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria)\r
 185 {\r
 186     if( src.empty() )\r
 187         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 188 \r
 189     if( src.depth() != CV_8U || src.channels() != 4 )\r
 190         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 191 \r
 192     dstr.create( src.size(), CV_8UC4 );\r
 193     dstsp.create( src.size(), CV_16SC2 );\r
 194 \r
 195     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 196         criteria.maxCount = 5;\r
 197 \r
 198     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 199 \r
 200     float eps;\r
 201     if( !(criteria.type & TermCriteria::EPS) )\r
 202         eps = 1.f;\r
 203     eps = (float)std::max(criteria.epsilon, 0.0);\r
 204 \r
 205     imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);\r
 206 }\r
 207 \r
 208 ////////////////////////////////////////////////////////////////////////\r
 209 // drawColorDisp\r
 210 \r
 211 namespace cv { namespace gpu {  namespace imgproc\r
 212 {\r
 213     void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 214     void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 215 }}}\r
 216 \r
 217 namespace\r
 218 {\r
 219     template <typename T>\r
 220     void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)\r
 221     {\r
 222         dst.create(src.size(), CV_8UC4);\r
 223 \r
 224         imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, dst, ndisp, stream);\r
 225     }\r
 226 \r
 227     typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);\r
 228 \r
 229     const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};\r
 230 }\r
 231 \r
 232 void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)\r
 233 {\r
 234     CV_Assert(src.type() == CV_8U || src.type() == CV_16S);\r
 235 \r
 236     drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));\r
 237 }\r
 238 \r
 239 ////////////////////////////////////////////////////////////////////////\r
 240 // reprojectImageTo3D\r
 241 \r
 242 namespace cv { namespace gpu {  namespace imgproc\r
 243 {\r
 244     void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 245     void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 246 }}}\r
 247 \r
 248 namespace\r
 249 {\r
 250     template <typename T>\r
 251     void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)\r
 252     {\r
 253         xyzw.create(disp.rows, disp.cols, CV_32FC4);\r
 254         imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);\r
 255     }\r
 256 \r
 257     typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);\r
 258 \r
 259     const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller<unsigned char>, 0, 0, reprojectImageTo3D_caller<short>, 0, 0, 0, 0};\r
 260 }\r
 261 \r
 262 void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, Stream& stream)\r
 263 {\r
 264     CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);\r
 265 \r
 266     reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));\r
 267 }\r
 268 \r
 269 ////////////////////////////////////////////////////////////////////////\r
 270 // resize\r
 271 \r
 272 void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
 273 {\r
 274     static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};\r
 275 \r
 276     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
 277     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);\r
 278 \r
 279     CV_Assert( src.size().area() > 0 );\r
 280     CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );\r
 281 \r
 282     if( dsize == Size() )\r
 283     {\r
 284         dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));\r
 285     }\r
 286     else\r
 287     {\r
 288         fx = (double)dsize.width / src.cols;\r
 289         fy = (double)dsize.height / src.rows;\r
 290     }\r
 291 \r
 292     dst.create(dsize, src.type());\r
 293 \r
 294     NppiSize srcsz;\r
 295     srcsz.width  = src.cols;\r
 296     srcsz.height = src.rows;\r
 297     NppiRect srcrect;\r
 298     srcrect.x = srcrect.y = 0;\r
 299     srcrect.width  = src.cols;\r
 300     srcrect.height = src.rows;\r
 301     NppiSize dstsz;\r
 302     dstsz.width  = dst.cols;\r
 303     dstsz.height = dst.rows;\r
 304 \r
 305     cudaStream_t stream = StreamAccessor::getStream(s);\r
 306 \r
 307     NppStreamHandler h(stream);\r
 308 \r
 309     if (src.type() == CV_8UC1)\r
 310     {\r
 311         nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 312             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 313     }\r
 314     else\r
 315     {\r
 316         nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 317             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 318     }\r
 319 \r
 320     if (stream == 0)\r
 321         cudaSafeCall( cudaDeviceSynchronize() );\r
 322 }\r
 323 \r
 324 ////////////////////////////////////////////////////////////////////////\r
 325 // copyMakeBorder\r
 326 \r
 327 void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value, Stream& s)\r
 328 {\r
 329     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1);\r
 330 \r
 331     dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
 332 \r
 333     NppiSize srcsz;\r
 334     srcsz.width  = src.cols;\r
 335     srcsz.height = src.rows;\r
 336     NppiSize dstsz;\r
 337     dstsz.width  = dst.cols;\r
 338     dstsz.height = dst.rows;\r
 339 \r
 340     cudaStream_t stream = StreamAccessor::getStream(s);\r
 341 \r
 342     NppStreamHandler h(stream);\r
 343 \r
 344     switch (src.type())\r
 345     {\r
 346     case CV_8UC1:\r
 347         {\r
 348             Npp8u nVal = static_cast<Npp8u>(value[0]);\r
 349             nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 350                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 351             break;\r
 352         }\r
 353     case CV_8UC4:\r
 354         {\r
 355             Npp8u nVal[] = {static_cast<Npp8u>(value[0]), static_cast<Npp8u>(value[1]), static_cast<Npp8u>(value[2]), static_cast<Npp8u>(value[3])};\r
 356             nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 357                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 358             break;\r
 359         }\r
 360     case CV_32SC1:\r
 361         {\r
 362             Npp32s nVal = static_cast<Npp32s>(value[0]);\r
 363             nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 364                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 365             break;\r
 366         }\r
 367     case CV_32FC1:\r
 368         {\r
 369             Npp32f val = static_cast<Npp32f>(value[0]);\r
 370             Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
 371             nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 372                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 373             break;\r
 374         }\r
 375     default:\r
 376         CV_Assert(!"Unsupported source type");\r
 377     }\r
 378 \r
 379     if (stream == 0)\r
 380         cudaSafeCall( cudaDeviceSynchronize() );\r
 381 }\r
 382 \r
 383 ////////////////////////////////////////////////////////////////////////\r
 384 // warp\r
 385 \r
 386 namespace\r
 387 {\r
 388     typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,\r
 389                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 390                                        int interpolation);\r
 391     typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,\r
 392                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 393                                        int interpolation);\r
 394     typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,\r
 395                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 396                                        int interpolation);\r
 397     typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,\r
 398                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 399                                        int interpolation);\r
 400 \r
 401     void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,\r
 402                        npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],\r
 403                        npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2], cudaStream_t stream)\r
 404     {\r
 405         static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 406 \r
 407         int interpolation = flags & INTER_MAX;\r
 408 \r
 409         CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);\r
 410         CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 411 \r
 412         dst.create(dsize, src.type());\r
 413 \r
 414         NppiSize srcsz;\r
 415         srcsz.height = src.rows;\r
 416         srcsz.width = src.cols;\r
 417         NppiRect srcroi;\r
 418         srcroi.x = srcroi.y = 0;\r
 419         srcroi.height = src.rows;\r
 420         srcroi.width = src.cols;\r
 421         NppiRect dstroi;\r
 422         dstroi.x = dstroi.y = 0;\r
 423         dstroi.height = dst.rows;\r
 424         dstroi.width = dst.cols;\r
 425 \r
 426         int warpInd = (flags & WARP_INVERSE_MAP) >> 4;\r
 427 \r
 428         NppStreamHandler h(stream);\r
 429 \r
 430         switch (src.depth())\r
 431         {\r
 432         case CV_8U:\r
 433             nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 434                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 435             break;\r
 436         case CV_16U:\r
 437             nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr<Npp16u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 438                 dst.ptr<Npp16u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 439             break;\r
 440         case CV_32S:\r
 441             nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr<Npp32s>(), srcsz, static_cast<int>(src.step), srcroi,\r
 442                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 443             break;\r
 444         case CV_32F:\r
 445             nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr<Npp32f>(), srcsz, static_cast<int>(src.step), srcroi,\r
 446                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 447             break;\r
 448         default:\r
 449             CV_Assert(!"Unsupported source type");\r
 450         }\r
 451 \r
 452         if (stream == 0)\r
 453             cudaSafeCall( cudaDeviceSynchronize() );\r
 454     }\r
 455 }\r
 456 \r
 457 void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 458 {\r
 459     static npp_warp_8u_t npp_warpAffine_8u[][2] =\r
 460         {\r
 461             {0, 0},\r
 462             {nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},\r
 463             {0, 0},\r
 464             {nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},\r
 465             {nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}\r
 466         };\r
 467     static npp_warp_16u_t npp_warpAffine_16u[][2] =\r
 468         {\r
 469             {0, 0},\r
 470             {nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},\r
 471             {0, 0},\r
 472             {nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},\r
 473             {nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}\r
 474         };\r
 475     static npp_warp_32s_t npp_warpAffine_32s[][2] =\r
 476         {\r
 477             {0, 0},\r
 478             {nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},\r
 479             {0, 0},\r
 480             {nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},\r
 481             {nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}\r
 482         };\r
 483     static npp_warp_32f_t npp_warpAffine_32f[][2] =\r
 484         {\r
 485             {0, 0},\r
 486             {nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},\r
 487             {0, 0},\r
 488             {nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},\r
 489             {nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}\r
 490         };\r
 491 \r
 492     CV_Assert(M.rows == 2 && M.cols == 3);\r
 493 \r
 494     double coeffs[2][3];\r
 495     Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);\r
 496     M.convertTo(coeffsMat, coeffsMat.type());\r
 497 \r
 498     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f, StreamAccessor::getStream(s));\r
 499 }\r
 500 \r
 501 void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 502 {\r
 503     static npp_warp_8u_t npp_warpPerspective_8u[][2] =\r
 504         {\r
 505             {0, 0},\r
 506             {nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},\r
 507             {0, 0},\r
 508             {nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},\r
 509             {nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}\r
 510         };\r
 511     static npp_warp_16u_t npp_warpPerspective_16u[][2] =\r
 512         {\r
 513             {0, 0},\r
 514             {nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},\r
 515             {0, 0},\r
 516             {nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},\r
 517             {nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}\r
 518         };\r
 519     static npp_warp_32s_t npp_warpPerspective_32s[][2] =\r
 520         {\r
 521             {0, 0},\r
 522             {nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},\r
 523             {0, 0},\r
 524             {nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},\r
 525             {nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}\r
 526         };\r
 527     static npp_warp_32f_t npp_warpPerspective_32f[][2] =\r
 528         {\r
 529             {0, 0},\r
 530             {nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},\r
 531             {0, 0},\r
 532             {nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},\r
 533             {nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}\r
 534         };\r
 535 \r
 536     CV_Assert(M.rows == 3 && M.cols == 3);\r
 537 \r
 538     double coeffs[3][3];\r
 539     Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);\r
 540     M.convertTo(coeffsMat, coeffsMat.type());\r
 541 \r
 542     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));\r
 543 }\r
 544 \r
 545 //////////////////////////////////////////////////////////////////////////////\r
 546 // buildWarpPlaneMaps\r
 547 \r
 548 namespace cv { namespace gpu { namespace imgproc\r
 549 {\r
 550     void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 551                             const float r[9], const float rinv[9], float f, float s, float dist,\r
 552                             float half_w, float half_h, cudaStream_t stream);\r
 553 }}}\r
 554 \r
 555 void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 556                                  double dist, GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 557 {\r
 558     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 559     Mat Rinv = R.inv();\r
 560     CV_Assert(Rinv.isContinuous());\r
 561 \r
 562     map_x.create(dst_roi.size(), CV_32F);\r
 563     map_y.create(dst_roi.size(), CV_32F);\r
 564     imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 565                                 static_cast<float>(f), static_cast<float>(s), static_cast<float>(dist), \r
 566                                 0.5f*src_size.width, 0.5f*src_size.height, StreamAccessor::getStream(stream));\r
 567 }\r
 568 \r
 569 //////////////////////////////////////////////////////////////////////////////\r
 570 // buildWarpCylyndricalMaps\r
 571 \r
 572 namespace cv { namespace gpu { namespace imgproc\r
 573 {\r
 574     void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 575                                   const float r[9], const float rinv[9], float f, float s,\r
 576                                   float half_w, float half_h, cudaStream_t stream);\r
 577 }}}\r
 578 \r
 579 void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 580                                        GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 581 {\r
 582     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 583     Mat Rinv = R.inv();\r
 584     CV_Assert(Rinv.isContinuous());\r
 585 \r
 586     map_x.create(dst_roi.size(), CV_32F);\r
 587     map_y.create(dst_roi.size(), CV_32F);\r
 588     imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 589                                       static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
 590                                       StreamAccessor::getStream(stream));\r
 591 }\r
 592 \r
 593 \r
 594 //////////////////////////////////////////////////////////////////////////////\r
 595 // buildWarpSphericalMaps\r
 596 \r
 597 namespace cv { namespace gpu { namespace imgproc\r
 598 {\r
 599     void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 600                                 const float r[9], const float rinv[9], float f, float s,\r
 601                                 float half_w, float half_h, cudaStream_t stream);\r
 602 }}}\r
 603 \r
 604 void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
 605                                      GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 606 {\r
 607     CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
 608     Mat Rinv = R.inv();\r
 609     CV_Assert(Rinv.isContinuous());\r
 610 \r
 611     map_x.create(dst_roi.size(), CV_32F);\r
 612     map_y.create(dst_roi.size(), CV_32F);\r
 613     imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
 614                                     static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
 615                                     StreamAccessor::getStream(stream));\r
 616 }\r
 617 \r
 618 ////////////////////////////////////////////////////////////////////////\r
 619 // rotate\r
 620 \r
 621 void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& s)\r
 622 {\r
 623     static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 624 \r
 625     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
 626     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 627 \r
 628     dst.create(dsize, src.type());\r
 629 \r
 630     NppiSize srcsz;\r
 631     srcsz.height = src.rows;\r
 632     srcsz.width = src.cols;\r
 633     NppiRect srcroi;\r
 634     srcroi.x = srcroi.y = 0;\r
 635     srcroi.height = src.rows;\r
 636     srcroi.width = src.cols;\r
 637     NppiRect dstroi;\r
 638     dstroi.x = dstroi.y = 0;\r
 639     dstroi.height = dst.rows;\r
 640     dstroi.width = dst.cols;\r
 641 \r
 642     cudaStream_t stream = StreamAccessor::getStream(s);\r
 643 \r
 644     NppStreamHandler h(stream);\r
 645 \r
 646     if (src.type() == CV_8UC1)\r
 647     {\r
 648         nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 649             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 650     }\r
 651     else\r
 652     {\r
 653         nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 654             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 655     }\r
 656 \r
 657     if (stream == 0)\r
 658         cudaSafeCall( cudaDeviceSynchronize() );\r
 659 }\r
 660 \r
 661 ////////////////////////////////////////////////////////////////////////\r
 662 // integral\r
 663 \r
 664 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)\r
 665 {\r
 666     GpuMat buffer;\r
 667     integralBuffered(src, sum, buffer, s);\r
 668 }\r
 669 \r
 670 void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)\r
 671 {\r
 672     CV_Assert(src.type() == CV_8UC1);\r
 673 \r
 674     sum.create(src.rows + 1, src.cols + 1, CV_32S);\r
 675     \r
 676     NcvSize32u roiSize;\r
 677     roiSize.width = src.cols;\r
 678     roiSize.height = src.rows;\r
 679 \r
 680         cudaDeviceProp prop;\r
 681         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 682 \r
 683     Ncv32u bufSize;\r
 684     nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );\r
 685     ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);\r
 686 \r
 687     cudaStream_t stream = StreamAccessor::getStream(s);\r
 688 \r
 689     NppStStreamHandler h(stream);\r
 690 \r
 691     nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step), \r
 692         sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );\r
 693 \r
 694     if (stream == 0)\r
 695         cudaSafeCall( cudaDeviceSynchronize() );\r
 696 }\r
 697 \r
 698 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& s)\r
 699 {\r
 700     CV_Assert(src.type() == CV_8UC1);\r
 701 \r
 702     int width = src.cols + 1, height = src.rows + 1;\r
 703 \r
 704     sum.create(height, width, CV_32S);\r
 705     sqsum.create(height, width, CV_32F);\r
 706 \r
 707     NppiSize sz;\r
 708     sz.width = src.cols;\r
 709     sz.height = src.rows;\r
 710 \r
 711     cudaStream_t stream = StreamAccessor::getStream(s);\r
 712 \r
 713     NppStreamHandler h(stream);\r
 714 \r
 715     nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), static_cast<int>(src.step), \r
 716         sum.ptr<Npp32s>(), static_cast<int>(sum.step), sqsum.ptr<Npp32f>(), static_cast<int>(sqsum.step), sz, 0, 0.0f, height) );\r
 717 \r
 718     if (stream == 0)\r
 719         cudaSafeCall( cudaDeviceSynchronize() );\r
 720 }\r
 721 \r
 722 //////////////////////////////////////////////////////////////////////////////\r
 723 // sqrIntegral\r
 724 \r
 725 void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)\r
 726 {\r
 727     CV_Assert(src.type() == CV_8U);\r
 728 \r
 729     NcvSize32u roiSize;\r
 730     roiSize.width = src.cols;\r
 731     roiSize.height = src.rows;\r
 732 \r
 733         cudaDeviceProp prop;\r
 734         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 735 \r
 736     Ncv32u bufSize;\r
 737     nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));       \r
 738     GpuMat buf(1, bufSize, CV_8U);\r
 739 \r
 740     cudaStream_t stream = StreamAccessor::getStream(s);\r
 741 \r
 742     NppStStreamHandler h(stream);\r
 743 \r
 744     sqsum.create(src.rows + 1, src.cols + 1, CV_64F);\r
 745     nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step), \r
 746             sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));\r
 747 \r
 748     if (stream == 0)\r
 749         cudaSafeCall( cudaDeviceSynchronize() );\r
 750 }\r
 751 \r
 752 //////////////////////////////////////////////////////////////////////////////\r
 753 // columnSum\r
 754 \r
 755 namespace cv { namespace gpu { namespace imgproc\r
 756 {\r
 757     void columnSum_32F(const DevMem2D src, const DevMem2D dst);\r
 758 }}}\r
 759 \r
 760 void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)\r
 761 {\r
 762     CV_Assert(src.type() == CV_32F);\r
 763 \r
 764     dst.create(src.size(), CV_32F);\r
 765     imgproc::columnSum_32F(src, dst);\r
 766 }\r
 767 \r
 768 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)\r
 769 {\r
 770     CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);\r
 771 \r
 772     dst.create(src.size(), CV_32FC1);\r
 773 \r
 774     NppiSize sz;\r
 775     sz.width = src.cols;\r
 776     sz.height = src.rows;\r
 777 \r
 778     NppiRect nppRect;\r
 779     nppRect.height = rect.height;\r
 780     nppRect.width = rect.width;\r
 781     nppRect.x = rect.x;\r
 782     nppRect.y = rect.y;\r
 783 \r
 784     cudaStream_t stream = StreamAccessor::getStream(s);\r
 785 \r
 786     NppStreamHandler h(stream);\r
 787 \r
 788     nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp32f>(), static_cast<int>(sqr.step),\r
 789                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );\r
 790 \r
 791     if (stream == 0)\r
 792         cudaSafeCall( cudaDeviceSynchronize() );\r
 793 }\r
 794 \r
 795 \r
 796 ////////////////////////////////////////////////////////////////////////\r
 797 // Histogram\r
 798 \r
 799 namespace\r
 800 {\r
 801     template<int n> struct NPPTypeTraits;\r
 802     template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };\r
 803     template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };\r
 804     template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };\r
 805     template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };\r
 806 \r
 807     typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);\r
 808     typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);\r
 809 \r
 810     template<int SDEPTH> struct NppHistogramEvenFuncC1\r
 811     {\r
 812         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 813 \r
 814         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,\r
 815                     int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);\r
 816     };\r
 817     template<int SDEPTH> struct NppHistogramEvenFuncC4\r
 818     {\r
 819         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 820 \r
 821         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,\r
 822             Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);\r
 823     };\r
 824 \r
 825     template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
 826     struct NppHistogramEvenC1\r
 827     {\r
 828         typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;\r
 829 \r
 830         static void hist(const GpuMat& src, GpuMat& hist, GpuMat& buffer, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)\r
 831         {\r
 832             int levels = histSize + 1;\r
 833             hist.create(1, histSize, CV_32S);\r
 834 \r
 835             NppiSize sz;\r
 836             sz.width = src.cols;\r
 837             sz.height = src.rows;\r
 838 \r
 839             int buf_size;\r
 840             get_buf_size(sz, levels, &buf_size);\r
 841 \r
 842             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 843 \r
 844             NppStreamHandler h(stream);\r
 845 \r
 846             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,\r
 847                 lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 848 \r
 849             if (stream == 0)\r
 850                 cudaSafeCall( cudaDeviceSynchronize() );\r
 851         }\r
 852     };\r
 853     template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
 854     struct NppHistogramEvenC4\r
 855     {\r
 856         typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;\r
 857 \r
 858         static void hist(const GpuMat& src, GpuMat hist[4], GpuMat& buffer, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)\r
 859         {\r
 860             int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};\r
 861             hist[0].create(1, histSize[0], CV_32S);\r
 862             hist[1].create(1, histSize[1], CV_32S);\r
 863             hist[2].create(1, histSize[2], CV_32S);\r
 864             hist[3].create(1, histSize[3], CV_32S);\r
 865 \r
 866             NppiSize sz;\r
 867             sz.width = src.cols;\r
 868             sz.height = src.rows;\r
 869 \r
 870             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
 871 \r
 872             int buf_size;\r
 873             get_buf_size(sz, levels, &buf_size);\r
 874 \r
 875             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 876 \r
 877             NppStreamHandler h(stream);\r
 878 \r
 879             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 880 \r
 881             if (stream == 0)\r
 882                 cudaSafeCall( cudaDeviceSynchronize() );\r
 883         }\r
 884     };\r
 885 \r
 886     template<int SDEPTH> struct NppHistogramRangeFuncC1\r
 887     {\r
 888         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 889         typedef Npp32s level_t;\r
 890         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 891 \r
 892         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 893             const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);\r
 894     };\r
 895     template<> struct NppHistogramRangeFuncC1<CV_32F>\r
 896     {\r
 897         typedef Npp32f src_t;\r
 898         typedef Npp32f level_t;\r
 899         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
 900 \r
 901         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 902             const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);\r
 903     };\r
 904     template<int SDEPTH> struct NppHistogramRangeFuncC4\r
 905     {\r
 906         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 907         typedef Npp32s level_t;\r
 908         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 909 \r
 910         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
 911             const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
 912     };\r
 913     template<> struct NppHistogramRangeFuncC4<CV_32F>\r
 914     {\r
 915         typedef Npp32f src_t;\r
 916         typedef Npp32f level_t;\r
 917         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
 918 \r
 919         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
 920             const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
 921     };\r
 922 \r
 923     template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
 924     struct NppHistogramRangeC1\r
 925     {\r
 926         typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;\r
 927         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
 928         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
 929 \r
 930         static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buffer, cudaStream_t stream)\r
 931         {\r
 932             CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1);\r
 933 \r
 934             hist.create(1, levels.cols - 1, CV_32S);\r
 935 \r
 936             NppiSize sz;\r
 937             sz.width = src.cols;\r
 938             sz.height = src.rows;\r
 939 \r
 940             int buf_size;\r
 941             get_buf_size(sz, levels.cols, &buf_size);\r
 942             \r
 943             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 944 \r
 945             NppStreamHandler h(stream);\r
 946 \r
 947             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );\r
 948 \r
 949             if (stream == 0)\r
 950                 cudaSafeCall( cudaDeviceSynchronize() );\r
 951         }\r
 952     };\r
 953     template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
 954     struct NppHistogramRangeC4\r
 955     {\r
 956         typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;\r
 957         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
 958         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
 959 \r
 960         static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buffer, cudaStream_t stream)\r
 961         {\r
 962             CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1);\r
 963             CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1);\r
 964             CV_Assert(levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1);\r
 965             CV_Assert(levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1);\r
 966 \r
 967             hist[0].create(1, levels[0].cols - 1, CV_32S);\r
 968             hist[1].create(1, levels[1].cols - 1, CV_32S);\r
 969             hist[2].create(1, levels[2].cols - 1, CV_32S);\r
 970             hist[3].create(1, levels[3].cols - 1, CV_32S);\r
 971 \r
 972             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
 973             int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};\r
 974             const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};\r
 975 \r
 976             NppiSize sz;\r
 977             sz.width = src.cols;\r
 978             sz.height = src.rows;\r
 979 \r
 980             int buf_size;\r
 981             get_buf_size(sz, nLevels, &buf_size);\r
 982 \r
 983             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 984 \r
 985             NppStreamHandler h(stream);\r
 986 \r
 987             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );\r
 988 \r
 989             if (stream == 0)\r
 990                 cudaSafeCall( cudaDeviceSynchronize() );\r
 991         }\r
 992     };\r
 993 }\r
 994 \r
 995 void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)\r
 996 {\r
 997     Mat host_levels(1, nLevels, CV_32SC1);\r
 998     nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );\r
 999     levels.upload(host_levels);\r
1000 }\r
1001 \r
1002 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
1003 {\r
1004     GpuMat buf;\r
1005     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
1006 }\r
1007 \r
1008 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
1009 {\r
1010     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );\r
1011 \r
1012     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, GpuMat& buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);\r
1013     static const hist_t hist_callers[] =\r
1014     {\r
1015         NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,\r
1016         0,\r
1017         NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,\r
1018         NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist\r
1019     };\r
1020 \r
1021     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
1022 }\r
1023 \r
1024 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
1025 {\r
1026     GpuMat buf;\r
1027     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
1028 }\r
1029 \r
1030 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
1031 {\r
1032     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );\r
1033 \r
1034     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);\r
1035     static const hist_t hist_callers[] =\r
1036     {\r
1037         NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,\r
1038         0,\r
1039         NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,\r
1040         NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist\r
1041     };\r
1042 \r
1043     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
1044 }\r
1045 \r
1046 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream)\r
1047 {\r
1048     GpuMat buf;\r
1049     histRange(src, hist, levels, buf, stream);\r
1050 }\r
1051 \r
1052 \r
1053 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream)\r
1054 {\r
1055     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);\r
1056 \r
1057     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, cudaStream_t stream);\r
1058     static const hist_t hist_callers[] =\r
1059     {\r
1060         NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,\r
1061         0,\r
1062         NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,\r
1063         NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,\r
1064         0,\r
1065         NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist\r
1066     };\r
1067 \r
1068     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1069 }\r
1070 \r
1071 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)\r
1072 {\r
1073     GpuMat buf;\r
1074     histRange(src, hist, levels, buf, stream);\r
1075 }\r
1076 \r
1077 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream)\r
1078 {\r
1079     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4);\r
1080 \r
1081     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, cudaStream_t stream);\r
1082     static const hist_t hist_callers[] =\r
1083     {\r
1084         NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,\r
1085         0,\r
1086         NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,\r
1087         NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,\r
1088         0,\r
1089         NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist\r
1090     };\r
1091 \r
1092     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1093 }\r
1094 \r
1095 namespace cv { namespace gpu { namespace histograms\r
1096 {\r
1097     void histogram256_gpu(DevMem2D src, int* hist, unsigned int* buf, cudaStream_t stream);\r
1098 \r
1099     const int PARTIAL_HISTOGRAM256_COUNT = 240;\r
1100     const int HISTOGRAM256_BIN_COUNT     = 256;\r
1101 }}}\r
1102 \r
1103 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)\r
1104 {\r
1105     GpuMat buf;\r
1106     calcHist(src, hist, buf, stream);\r
1107 }\r
1108 \r
1109 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)\r
1110 {\r
1111     using namespace cv::gpu::histograms;\r
1112 \r
1113     CV_Assert(src.type() == CV_8UC1);\r
1114 \r
1115     hist.create(1, 256, CV_32SC1);\r
1116 \r
1117     ensureSizeIsEnough(1, PARTIAL_HISTOGRAM256_COUNT * HISTOGRAM256_BIN_COUNT, CV_32SC1, buf);\r
1118 \r
1119     histogram256_gpu(src, hist.ptr<int>(), buf.ptr<unsigned int>(), StreamAccessor::getStream(stream));\r
1120 }\r
1121 \r
1122 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1123 {\r
1124     GpuMat hist;\r
1125     GpuMat buf;\r
1126     equalizeHist(src, dst, hist, buf, stream);\r
1127 }\r
1128 \r
1129 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream)\r
1130 {\r
1131     GpuMat buf;\r
1132     equalizeHist(src, dst, hist, buf, stream);\r
1133 }\r
1134 \r
1135 namespace cv { namespace gpu { namespace histograms\r
1136 {\r
1137     void equalizeHist_gpu(DevMem2D src, DevMem2D dst, const int* lut, cudaStream_t stream);\r
1138 }}}\r
1139 \r
1140 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)\r
1141 {\r
1142     using namespace cv::gpu::histograms;\r
1143 \r
1144     CV_Assert(src.type() == CV_8UC1);\r
1145 \r
1146     dst.create(src.size(), src.type());\r
1147 \r
1148     int intBufSize;\r
1149     nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) );\r
1150 \r
1151     int bufSize = static_cast<int>(std::max(256 * 240 * sizeof(int), intBufSize + 256 * sizeof(int)));\r
1152 \r
1153     ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);\r
1154 \r
1155     GpuMat histBuf(1, 256 * 240, CV_32SC1, buf.ptr());\r
1156     GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr());\r
1157     GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize);\r
1158 \r
1159     calcHist(src, hist, histBuf, s);\r
1160 \r
1161     cudaStream_t stream = StreamAccessor::getStream(s);\r
1162 \r
1163     NppStreamHandler h(stream);\r
1164 \r
1165     nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) );\r
1166     \r
1167     if (stream == 0)\r
1168         cudaSafeCall( cudaDeviceSynchronize() );\r
1169 \r
1170     equalizeHist_gpu(src, dst, lut.ptr<int>(), stream);\r
1171 }\r
1172 \r
1173 ////////////////////////////////////////////////////////////////////////\r
1174 // cornerHarris & minEgenVal\r
1175 \r
1176 namespace cv { namespace gpu { namespace imgproc {\r
1177 \r
1178     void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst);\r
1179     void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1180     void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1181 \r
1182 }}}\r
1183 \r
1184 namespace \r
1185 {\r
1186     template <typename T>\r
1187     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1188     {   \r
1189         double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;\r
1190         if (ksize < 0) \r
1191             scale *= 2.;\r
1192         if (src.depth() == CV_8U)\r
1193             scale *= 255.;\r
1194         scale = 1./scale;\r
1195 \r
1196         Dx.create(src.size(), CV_32F);\r
1197         Dy.create(src.size(), CV_32F);\r
1198 \r
1199         if (ksize > 0)\r
1200         {\r
1201             Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, borderType);\r
1202             Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, borderType);\r
1203         }\r
1204         else\r
1205         {\r
1206             Scharr(src, Dx, CV_32F, 1, 0, scale, borderType);\r
1207             Scharr(src, Dy, CV_32F, 0, 1, scale, borderType);\r
1208         }\r
1209     }\r
1210 \r
1211     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1212     {\r
1213         switch (src.type())\r
1214         {\r
1215         case CV_8U:\r
1216             extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, borderType);\r
1217             break;\r
1218         case CV_32F:\r
1219             extractCovData<float>(src, Dx, Dy, blockSize, ksize, borderType);\r
1220             break;\r
1221         default:\r
1222             CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");\r
1223         }\r
1224     }\r
1225 \r
1226 } // Anonymous namespace\r
1227 \r
1228 \r
1229 bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)\r
1230 {\r
1231     switch (cpuBorderType)\r
1232     {\r
1233     case cv::BORDER_REFLECT101:\r
1234         gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;\r
1235         return true;\r
1236     case cv::BORDER_REPLICATE:\r
1237         gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;\r
1238         return true;\r
1239     case cv::BORDER_CONSTANT:\r
1240         gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;\r
1241         return true;\r
1242     case cv::BORDER_REFLECT:\r
1243         gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;\r
1244         return true;\r
1245     case cv::BORDER_WRAP:\r
1246         gpuBorderType = cv::gpu::BORDER_WRAP_GPU;\r
1247         return true;\r
1248     default:\r
1249         return false;\r
1250     };\r
1251     return false;\r
1252 }\r
1253 \r
1254 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)\r
1255 {\r
1256     GpuMat Dx, Dy;\r
1257     cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k, borderType);\r
1258 }\r
1259 \r
1260 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType)\r
1261 {\r
1262     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1263               borderType == cv::BORDER_REPLICATE);\r
1264 \r
1265     int gpuBorderType;\r
1266     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1267 \r
1268     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);\r
1269     dst.create(src.size(), CV_32F);\r
1270     imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);\r
1271 }\r
1272 \r
1273 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)\r
1274 {  \r
1275     GpuMat Dx, Dy;\r
1276     cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize, borderType);\r
1277 }\r
1278 \r
1279 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1280 {  \r
1281     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1282               borderType == cv::BORDER_REPLICATE);\r
1283 \r
1284     int gpuBorderType;\r
1285     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1286 \r
1287     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);    \r
1288     dst.create(src.size(), CV_32F);\r
1289     imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);\r
1290 }\r
1291 \r
1292 //////////////////////////////////////////////////////////////////////////////\r
1293 // mulSpectrums\r
1294 \r
1295 namespace cv { namespace gpu { namespace imgproc \r
1296 {\r
1297     void mulSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1298                       DevMem2D_<cufftComplex> c);\r
1299 \r
1300     void mulSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1301                            DevMem2D_<cufftComplex> c);\r
1302 }}}\r
1303 \r
1304 \r
1305 void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, \r
1306                            int flags, bool conjB) \r
1307 {\r
1308     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>, \r
1309                            DevMem2D_<cufftComplex>);\r
1310     static Caller callers[] = { imgproc::mulSpectrums, \r
1311                                 imgproc::mulSpectrums_CONJ };\r
1312 \r
1313     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1314     CV_Assert(a.size() == b.size());\r
1315 \r
1316     c.create(a.size(), CV_32FC2);\r
1317 \r
1318     Caller caller = callers[(int)conjB];\r
1319     caller(a, b, c);\r
1320 }\r
1321 \r
1322 //////////////////////////////////////////////////////////////////////////////\r
1323 // mulAndScaleSpectrums\r
1324 \r
1325 namespace cv { namespace gpu { namespace imgproc \r
1326 {\r
1327     void mulAndScaleSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1328                              float scale, DevMem2D_<cufftComplex> c);\r
1329 \r
1330     void mulAndScaleSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1331                                   float scale, DevMem2D_<cufftComplex> c);\r
1332 }}}\r
1333 \r
1334 \r
1335 void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,\r
1336                                   int flags, float scale, bool conjB) \r
1337 {\r
1338     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>,\r
1339                            float scale, DevMem2D_<cufftComplex>);\r
1340     static Caller callers[] = { imgproc::mulAndScaleSpectrums, \r
1341                                 imgproc::mulAndScaleSpectrums_CONJ };\r
1342 \r
1343     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1344     CV_Assert(a.size() == b.size());\r
1345 \r
1346     c.create(a.size(), CV_32FC2);\r
1347 \r
1348     Caller caller = callers[(int)conjB];\r
1349     caller(a, b, scale, c);\r
1350 }\r
1351 \r
1352 //////////////////////////////////////////////////////////////////////////////\r
1353 // dft\r
1354 \r
1355 void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)\r
1356 {\r
1357     CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);\r
1358 \r
1359     // We don't support unpacked output (in the case of real input)\r
1360     CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));\r
1361 \r
1362     bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);\r
1363     int is_row_dft = flags & DFT_ROWS;\r
1364     int is_scaled_dft = flags & DFT_SCALE;\r
1365     int is_inverse = flags & DFT_INVERSE;\r
1366     bool is_complex_input = src.channels() == 2;\r
1367     bool is_complex_output = !(flags & DFT_REAL_OUTPUT);\r
1368 \r
1369     // We don't support real-to-real transform\r
1370     CV_Assert(is_complex_input || is_complex_output);\r
1371 \r
1372     GpuMat src_data;\r
1373 \r
1374     // Make sure here we work with the continuous input, \r
1375     // as CUFFT can't handle gaps\r
1376     src_data = src;\r
1377     createContinuous(src.rows, src.cols, src.type(), src_data);\r
1378     if (src_data.data != src.data)\r
1379         src.copyTo(src_data);\r
1380 \r
1381     Size dft_size_opt = dft_size;\r
1382     if (is_1d_input && !is_row_dft)\r
1383     {\r
1384         // If the source matrix is single column handle it as single row\r
1385         dft_size_opt.width = std::max(dft_size.width, dft_size.height);\r
1386         dft_size_opt.height = std::min(dft_size.width, dft_size.height);\r
1387     }\r
1388 \r
1389     cufftType dft_type = CUFFT_R2C;\r
1390     if (is_complex_input) \r
1391         dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
1392 \r
1393     CV_Assert(dft_size_opt.width > 1);\r
1394 \r
1395     cufftHandle plan;\r
1396     if (is_1d_input || is_row_dft)\r
1397         cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);\r
1398     else\r
1399         cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);\r
1400 \r
1401     if (is_complex_input)\r
1402     {\r
1403         if (is_complex_output)\r
1404         {\r
1405             createContinuous(dft_size, CV_32FC2, dst);\r
1406             cufftSafeCall(cufftExecC2C(\r
1407                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),\r
1408                     is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));\r
1409         }\r
1410         else\r
1411         {\r
1412             createContinuous(dft_size, CV_32F, dst);\r
1413             cufftSafeCall(cufftExecC2R(\r
1414                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftReal>()));\r
1415         }\r
1416     }\r
1417     else\r
1418     {\r
1419         // We could swap dft_size for efficiency. Here we must reflect it\r
1420         if (dft_size == dft_size_opt)\r
1421             createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);\r
1422         else\r
1423             createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);\r
1424 \r
1425         cufftSafeCall(cufftExecR2C(\r
1426                 plan, src_data.ptr<cufftReal>(), dst.ptr<cufftComplex>()));\r
1427     }\r
1428 \r
1429     cufftSafeCall(cufftDestroy(plan));\r
1430 \r
1431     if (is_scaled_dft)\r
1432         multiply(dst, Scalar::all(1. / dft_size.area()), dst);\r
1433 }\r
1434 \r
1435 //////////////////////////////////////////////////////////////////////////////\r
1436 // convolve\r
1437 \r
1438 \r
1439 void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)\r
1440 {\r
1441     result_size = Size(image_size.width - templ_size.width + 1,\r
1442                        image_size.height - templ_size.height + 1);\r
1443     block_size = estimateBlockSize(result_size, templ_size);\r
1444 \r
1445     dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);\r
1446     dft_size.height = getOptimalDFTSize(block_size.width + templ_size.height - 1);\r
1447     createContinuous(dft_size, CV_32F, image_block);\r
1448     createContinuous(dft_size, CV_32F, templ_block);\r
1449     createContinuous(dft_size, CV_32F, result_data);\r
1450 \r
1451     spect_len = dft_size.height * (dft_size.width / 2 + 1);\r
1452     createContinuous(1, spect_len, CV_32FC2, image_spect);\r
1453     createContinuous(1, spect_len, CV_32FC2, templ_spect);\r
1454     createContinuous(1, spect_len, CV_32FC2, result_spect);\r
1455 \r
1456     block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);\r
1457     block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);\r
1458 }\r
1459 \r
1460 \r
1461 Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)\r
1462 {\r
1463     int scale = 40;\r
1464     Size bsize_min(1024, 1024);\r
1465 \r
1466     // Check whether we use Fermi generation or newer GPU\r
1467     if (DeviceInfo().majorVersion() >= 2)\r
1468     {\r
1469         bsize_min.width = 2048;\r
1470         bsize_min.height = 2048;\r
1471     }\r
1472 \r
1473     Size bsize(std::max(templ_size.width * scale, bsize_min.width),\r
1474                std::max(templ_size.height * scale, bsize_min.height));\r
1475 \r
1476     bsize.width = std::min(bsize.width, result_size.width);\r
1477     bsize.height = std::min(bsize.height, result_size.height);\r
1478     return bsize;\r
1479 }\r
1480 \r
1481 \r
1482 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1483                        bool ccorr)\r
1484 {\r
1485     ConvolveBuf buf;\r
1486     convolve(image, templ, result, ccorr, buf);\r
1487 }\r
1488 \r
1489 \r
1490 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1491                        bool ccorr, ConvolveBuf& buf)\r
1492 {\r
1493     StaticAssert<sizeof(float) == sizeof(cufftReal)>::check();\r
1494     StaticAssert<sizeof(float) * 2 == sizeof(cufftComplex)>::check();\r
1495 \r
1496     CV_Assert(image.type() == CV_32F);\r
1497     CV_Assert(templ.type() == CV_32F);\r
1498 \r
1499     buf.create(image.size(), templ.size());\r
1500     result.create(buf.result_size, CV_32F);\r
1501 \r
1502     Size& block_size = buf.block_size;\r
1503     Size& dft_size = buf.dft_size;\r
1504 \r
1505     GpuMat& image_block = buf.image_block;\r
1506     GpuMat& templ_block = buf.templ_block;\r
1507     GpuMat& result_data = buf.result_data;\r
1508 \r
1509     GpuMat& image_spect = buf.image_spect;\r
1510     GpuMat& templ_spect = buf.templ_spect;\r
1511     GpuMat& result_spect = buf.result_spect;\r
1512 \r
1513     cufftHandle planR2C, planC2R;\r
1514     cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
1515     cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
1516 \r
1517     GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
1518     copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
1519                    templ_block.cols - templ_roi.cols, 0);\r
1520 \r
1521     cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
1522                                templ_spect.ptr<cufftComplex>()));\r
1523 \r
1524     // Process all blocks of the result matrix\r
1525     for (int y = 0; y < result.rows; y += block_size.height)\r
1526     {\r
1527         for (int x = 0; x < result.cols; x += block_size.width)\r
1528         {\r
1529             Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,\r
1530                                 std::min(y + dft_size.height, image.rows) - y);\r
1531             GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), \r
1532                              image.step);\r
1533             copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,\r
1534                            0, image_block.cols - image_roi.cols, 0);\r
1535 \r
1536             cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(), \r
1537                                        image_spect.ptr<cufftComplex>()));\r
1538             mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,\r
1539                                  1.f / dft_size.area(), ccorr);\r
1540             cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
1541                                        result_data.ptr<cufftReal>()));\r
1542 \r
1543             Size result_roi_size(std::min(x + block_size.width, result.cols) - x,\r
1544                                  std::min(y + block_size.height, result.rows) - y);\r
1545             GpuMat result_roi(result_roi_size, result.type(), \r
1546                               (void*)(result.ptr<float>(y) + x), result.step);\r
1547             GpuMat result_block(result_roi_size, result_data.type(), \r
1548                                 result_data.ptr(), result_data.step);\r
1549             result_block.copyTo(result_roi);\r
1550         }\r
1551     }\r
1552 \r
1553     cufftSafeCall(cufftDestroy(planR2C));\r
1554     cufftSafeCall(cufftDestroy(planC2R));\r
1555 }\r
1556 \r
1557 \r
1558 ////////////////////////////////////////////////////////////////////\r
1559 // downsample\r
1560 \r
1561 namespace cv { namespace gpu { namespace imgproc\r
1562 {\r
1563     template <typename T, int cn>\r
1564     void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
1565 }}}\r
1566 \r
1567 \r
1568 void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1569 {\r
1570     CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
1571 \r
1572     typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
1573     static const Caller callers[6][4] =\r
1574         {{imgproc::downsampleCaller<uchar,1>, imgproc::downsampleCaller<uchar,2>,\r
1575           imgproc::downsampleCaller<uchar,3>, imgproc::downsampleCaller<uchar,4>},\r
1576          {0,0,0,0}, {0,0,0,0},\r
1577          {imgproc::downsampleCaller<short,1>, imgproc::downsampleCaller<short,2>,\r
1578           imgproc::downsampleCaller<short,3>, imgproc::downsampleCaller<short,4>},\r
1579          {0,0,0,0},\r
1580          {imgproc::downsampleCaller<float,1>, imgproc::downsampleCaller<float,2>,\r
1581           imgproc::downsampleCaller<float,3>, imgproc::downsampleCaller<float,4>}};\r
1582 \r
1583     Caller caller = callers[src.depth()][src.channels()-1];\r
1584     if (!caller)\r
1585         CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
1586 \r
1587     dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
1588     caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
1589 }\r
1590 \r
1591 \r
1592 //////////////////////////////////////////////////////////////////////////////\r
1593 // upsample\r
1594 \r
1595 namespace cv { namespace gpu { namespace imgproc\r
1596 {\r
1597     template <typename T, int cn>\r
1598     void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
1599 }}}\r
1600 \r
1601 \r
1602 void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1603 {\r
1604     CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
1605 \r
1606     typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
1607     static const Caller callers[6][5] =\r
1608         {{imgproc::upsampleCaller<uchar,1>, imgproc::upsampleCaller<uchar,2>,\r
1609           imgproc::upsampleCaller<uchar,3>, imgproc::upsampleCaller<uchar,4>},\r
1610          {0,0,0,0}, {0,0,0,0},\r
1611          {imgproc::upsampleCaller<short,1>, imgproc::upsampleCaller<short,2>,\r
1612           imgproc::upsampleCaller<short,3>, imgproc::upsampleCaller<short,4>},\r
1613          {0,0,0,0},\r
1614          {imgproc::upsampleCaller<float,1>, imgproc::upsampleCaller<float,2>,\r
1615           imgproc::upsampleCaller<float,3>, imgproc::upsampleCaller<float,4>}};\r
1616 \r
1617     Caller caller = callers[src.depth()][src.channels()-1];\r
1618     if (!caller)\r
1619         CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
1620 \r
1621     dst.create(src.rows*2, src.cols*2, src.type());\r
1622     caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
1623 }\r
1624 \r
1625 \r
1626 //////////////////////////////////////////////////////////////////////////////\r
1627 // pyrDown\r
1628 \r
1629 namespace cv { namespace gpu { namespace imgproc\r
1630 {\r
1631     template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1632 }}}\r
1633 \r
1634 void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)\r
1635 {\r
1636     using namespace cv::gpu::imgproc;\r
1637 \r
1638     typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1639 \r
1640     static const func_t funcs[6][4] = \r
1641     {\r
1642         {pyrDown_gpu<uchar, 1>, pyrDown_gpu<uchar, 2>, pyrDown_gpu<uchar, 3>, pyrDown_gpu<uchar, 4>},\r
1643         {pyrDown_gpu<schar, 1>, pyrDown_gpu<schar, 2>, pyrDown_gpu<schar, 3>, pyrDown_gpu<schar, 4>},\r
1644         {pyrDown_gpu<ushort, 1>, pyrDown_gpu<ushort, 2>, pyrDown_gpu<ushort, 3>, pyrDown_gpu<ushort, 4>},\r
1645         {pyrDown_gpu<short, 1>, pyrDown_gpu<short, 2>, pyrDown_gpu<short, 3>, pyrDown_gpu<short, 4>},\r
1646         {pyrDown_gpu<int, 1>, pyrDown_gpu<int, 2>, pyrDown_gpu<int, 3>, pyrDown_gpu<int, 4>},\r
1647         {pyrDown_gpu<float, 1>, pyrDown_gpu<float, 2>, pyrDown_gpu<float, 3>, pyrDown_gpu<float, 4>},\r
1648     };\r
1649 \r
1650     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
1651 \r
1652     CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
1653     int gpuBorderType;\r
1654     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1655 \r
1656     dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
1657 \r
1658     funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));\r
1659 }\r
1660 \r
1661 \r
1662 //////////////////////////////////////////////////////////////////////////////\r
1663 // pyrUp\r
1664 \r
1665 namespace cv { namespace gpu { namespace imgproc\r
1666 {\r
1667     template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1668 }}}\r
1669 \r
1670 void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)\r
1671 {\r
1672     using namespace cv::gpu::imgproc;\r
1673 \r
1674     typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1675 \r
1676     static const func_t funcs[6][4] = \r
1677     {\r
1678         {pyrUp_gpu<uchar, 1>, pyrUp_gpu<uchar, 2>, pyrUp_gpu<uchar, 3>, pyrUp_gpu<uchar, 4>},\r
1679         {pyrUp_gpu<schar, 1>, pyrUp_gpu<schar, 2>, pyrUp_gpu<schar, 3>, pyrUp_gpu<schar, 4>},\r
1680         {pyrUp_gpu<ushort, 1>, pyrUp_gpu<ushort, 2>, pyrUp_gpu<ushort, 3>, pyrUp_gpu<ushort, 4>},\r
1681         {pyrUp_gpu<short, 1>, pyrUp_gpu<short, 2>, pyrUp_gpu<short, 3>, pyrUp_gpu<short, 4>},\r
1682         {pyrUp_gpu<int, 1>, pyrUp_gpu<int, 2>, pyrUp_gpu<int, 3>, pyrUp_gpu<int, 4>},\r
1683         {pyrUp_gpu<float, 1>, pyrUp_gpu<float, 2>, pyrUp_gpu<float, 3>, pyrUp_gpu<float, 4>},\r
1684     };\r
1685 \r
1686     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
1687 \r
1688     CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
1689     int gpuBorderType;\r
1690     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1691 \r
1692     dst.create(src.rows*2, src.cols*2, src.type());\r
1693 \r
1694     funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));\r
1695 }\r
1696 \r
1697 \r
1698 //////////////////////////////////////////////////////////////////////////////\r
1699 // Canny\r
1700 \r
1701 cv::gpu::CannyBuf::CannyBuf(const GpuMat& dx_, const GpuMat& dy_) : dx(dx_), dy(dy_)\r
1702 {\r
1703     CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());\r
1704 \r
1705     create(dx_.size(), -1);\r
1706 }\r
1707 \r
1708 void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)\r
1709 {\r
1710     ensureSizeIsEnough(image_size, CV_32SC1, dx);\r
1711     ensureSizeIsEnough(image_size, CV_32SC1, dy);\r
1712 \r
1713     if (apperture_size == 3)\r
1714     {\r
1715         ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);\r
1716         ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);\r
1717     }\r
1718     else if(apperture_size > 0)\r
1719     {\r
1720         if (!filterDX)\r
1721             filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);\r
1722         if (!filterDY)\r
1723             filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);\r
1724     }\r
1725 \r
1726     ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);\r
1727 \r
1728     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);\r
1729     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);\r
1730 }\r
1731 \r
1732 void cv::gpu::CannyBuf::release()\r
1733 {\r
1734     dx.release();\r
1735     dy.release();\r
1736     dx_buf.release();\r
1737     dy_buf.release();\r
1738     edgeBuf.release();\r
1739     trackBuf1.release();\r
1740     trackBuf2.release();\r
1741 }\r
1742 \r
1743 namespace cv { namespace gpu { namespace canny\r
1744 {    \r
1745     void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);\r
1746 \r
1747     void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1748     void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1749 \r
1750     void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh);\r
1751     \r
1752     void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols);\r
1753 \r
1754     void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);\r
1755 \r
1756     void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols);\r
1757 }}}\r
1758 \r
1759 namespace\r
1760 {\r
1761     void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)\r
1762     {\r
1763         using namespace cv::gpu::canny;\r
1764 \r
1765         calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);\r
1766         \r
1767         edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), dst.rows, dst.cols);\r
1768         \r
1769         edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), buf.trackBuf2.ptr<ushort2>(), dst.rows, dst.cols);\r
1770         \r
1771         getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);\r
1772     }\r
1773 }\r
1774 \r
1775 void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1776 {\r
1777     CannyBuf buf(src.size(), apperture_size);\r
1778     Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);\r
1779 }\r
1780 \r
1781 void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1782 {\r
1783     using namespace cv::gpu::canny;\r
1784 \r
1785     CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
1786     CV_Assert(src.type() == CV_8UC1);\r
1787 \r
1788     if( low_thresh > high_thresh )\r
1789         std::swap( low_thresh, high_thresh);\r
1790 \r
1791     dst.create(src.size(), CV_8U);\r
1792     dst.setTo(Scalar::all(0));\r
1793     \r
1794     buf.create(src.size(), apperture_size);\r
1795     buf.edgeBuf.setTo(Scalar::all(0));\r
1796 \r
1797     if (apperture_size == 3)\r
1798     {\r
1799         calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);\r
1800 \r
1801         calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1802     }\r
1803     else\r
1804     {\r
1805         buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows));\r
1806         buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows));\r
1807 \r
1808         calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1809     }\r
1810 \r
1811     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1812 }\r
1813 \r
1814 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1815 {\r
1816     CannyBuf buf(dx, dy);\r
1817     Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);\r
1818 }\r
1819 \r
1820 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1821 {\r
1822     using namespace cv::gpu::canny;\r
1823 \r
1824     CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
1825     CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());\r
1826 \r
1827     if( low_thresh > high_thresh )\r
1828         std::swap( low_thresh, high_thresh);\r
1829 \r
1830     dst.create(dx.size(), CV_8U);\r
1831     dst.setTo(Scalar::all(0));\r
1832     \r
1833     buf.dx = dx; buf.dy = dy;\r
1834     buf.create(dx.size(), -1);\r
1835     buf.edgeBuf.setTo(Scalar::all(0));\r
1836 \r
1837     calcMagnitude_gpu(dx, dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);\r
1838 \r
1839     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1840 }\r
1841 \r
1842 #endif /* !defined (HAVE_CUDA) */\r
1843 \r
1844 \r