modules/gpu/src/imgproc.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
   2 //\r
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
   4 //\r
   5 //  By downloading, copying, installing or using the software you agree to this license.\r
   6 //  If you do not agree to this license, do not download, install,\r
   7 //  copy or use the software.\r
   8 //\r
   9 //\r
  10 //                           License Agreement\r
  11 //                For Open Source Computer Vision Library\r
  12 //\r
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
  15 // Third party copyrights are property of their respective owners.\r
  16 //\r
  17 // Redistribution and use in source and binary forms, with or without modification,\r
  18 // are permitted provided that the following conditions are met:\r
  19 //\r
  20 //   * Redistribution's of source code must retain the above copyright notice,\r
  21 //     this list of conditions and the following disclaimer.\r
  22 //\r
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
  24 //     this list of conditions and the following disclaimer in the documentation\r
  25 //     and/or other materials provided with the distribution.\r
  26 //\r
  27 //   * The name of the copyright holders may not be used to endorse or promote products\r
  28 //     derived from this software without specific prior written permission.\r
  29 //\r
  30 // This software is provided by the copyright holders and contributors "as is" and\r
  31 // any express or implied warranties, including, but not limited to, the implied\r
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
  34 // indirect, incidental, special, exemplary, or consequential damages\r
  35 // (including, but not limited to, procurement of substitute goods or services;\r
  36 // loss of use, data, or profits; or business interruption) however caused\r
  37 // and on any theory of liability, whether in contract, strict liability,\r
  38 // or tort (including negligence or otherwise) arising in any way out of\r
  39 // the use of this software, even if advised of the possibility of such damage.\r
  40 //\r
  41 //M*/\r
  42 \r
  43 #include "precomp.hpp"\r
  44 \r
  45 using namespace cv;\r
  46 using namespace cv::gpu;\r
  47 \r
  48 #if !defined (HAVE_CUDA)\r
  49 \r
  50 void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&, Stream&){ throw_nogpu(); }\r
  51 void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  52 void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  53 void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  54 void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }\r
  55 void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }\r
  56 void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
  57 void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  58 void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  59 void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  60 void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  61 void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  62 void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }\r
  63 void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  64 void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  65 void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  66 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  67 void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
  68 void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_nogpu(); }\r
  69 void cv::gpu::evenLevels(GpuMat&, int, int, int) { throw_nogpu(); }\r
  70 void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  71 void cv::gpu::histEven(const GpuMat&, GpuMat&, GpuMat&, int, int, int, Stream&) { throw_nogpu(); }\r
  72 void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  73 void cv::gpu::histEven(const GpuMat&, GpuMat*, GpuMat&, int*, int*, int*, Stream&) { throw_nogpu(); }\r
  74 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }\r
  75 void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  76 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, Stream&) { throw_nogpu(); }\r
  77 void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*, GpuMat&, Stream&) { throw_nogpu(); }\r
  78 void cv::gpu::calcHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  79 void cv::gpu::calcHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  80 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  81 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  82 void cv::gpu::equalizeHist(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  83 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  84 void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }\r
  85 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  86 void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
  87 void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }\r
  88 void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool) { throw_nogpu(); }\r
  89 void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); }\r
  90 void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }\r
  91 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }\r
  92 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }\r
  93 void cv::gpu::pyrDown(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  94 void cv::gpu::pyrUp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  95 void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
  96 void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
  97 void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
  98 void cv::gpu::Canny(const GpuMat&, const GpuMat&, CannyBuf&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
  99 cv::gpu::CannyBuf::CannyBuf(const GpuMat&, const GpuMat&) { throw_nogpu(); }\r
 100 void cv::gpu::CannyBuf::create(const Size&, int) { throw_nogpu(); }\r
 101 void cv::gpu::CannyBuf::release() { throw_nogpu(); }\r
 102 \r
 103 #else /* !defined (HAVE_CUDA) */\r
 104 \r
 105 ////////////////////////////////////////////////////////////////////////\r
 106 // remap\r
 107 \r
 108 namespace cv { namespace gpu {  namespace imgproc\r
 109 {\r
 110     template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, \r
 111                                          int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
 112 }}}\r
 113 \r
 114 void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)\r
 115 {\r
 116     using namespace cv::gpu::imgproc;\r
 117 \r
 118     typedef void (*caller_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
 119     static const caller_t callers[6][4] = \r
 120     {\r
 121         {remap_gpu<uchar>, 0/*remap_gpu<uchar2>*/, remap_gpu<uchar3>, remap_gpu<uchar4>},\r
 122         {0/*remap_gpu<schar>*/, 0/*remap_gpu<char2>*/, 0/*remap_gpu<char3>*/, 0/*remap_gpu<char4>*/},\r
 123         {remap_gpu<ushort>, 0/*remap_gpu<ushort2>*/, remap_gpu<ushort3>, remap_gpu<ushort4>},\r
 124         {remap_gpu<short>, 0/*remap_gpu<short2>*/, remap_gpu<short3>, remap_gpu<short4>},\r
 125         {0/*remap_gpu<int>*/, 0/*remap_gpu<int2>*/, 0/*remap_gpu<int3>*/, 0/*remap_gpu<int4>*/},\r
 126         {remap_gpu<float>, 0/*remap_gpu<float2>*/, remap_gpu<float3>, remap_gpu<float4>}\r
 127     };\r
 128 \r
 129     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
 130     CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());\r
 131 \r
 132     caller_t func = callers[src.depth()][src.channels() - 1];\r
 133     CV_Assert(func != 0);\r
 134 \r
 135     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 136 \r
 137     CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);\r
 138     int gpuBorderType;\r
 139     CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));\r
 140 \r
 141     dst.create(xmap.size(), src.type());\r
 142     \r
 143     Scalar_<float> borderValueFloat;\r
 144     borderValueFloat = borderValue;\r
 145 \r
 146     func(src, xmap, ymap, dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream));\r
 147 }\r
 148 \r
 149 ////////////////////////////////////////////////////////////////////////\r
 150 // meanShiftFiltering_GPU\r
 151 \r
 152 namespace cv { namespace gpu {  namespace imgproc\r
 153 {\r
 154     extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);\r
 155 }}}\r
 156 \r
 157 void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria)\r
 158 {\r
 159     if( src.empty() )\r
 160         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 161 \r
 162     if( src.depth() != CV_8U || src.channels() != 4 )\r
 163         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 164 \r
 165     dst.create( src.size(), CV_8UC4 );\r
 166 \r
 167     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 168         criteria.maxCount = 5;\r
 169 \r
 170     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 171 \r
 172     float eps;\r
 173     if( !(criteria.type & TermCriteria::EPS) )\r
 174         eps = 1.f;\r
 175     eps = (float)std::max(criteria.epsilon, 0.0);\r
 176 \r
 177     imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);\r
 178 }\r
 179 \r
 180 ////////////////////////////////////////////////////////////////////////\r
 181 // meanShiftProc_GPU\r
 182 \r
 183 namespace cv { namespace gpu {  namespace imgproc\r
 184 {\r
 185     extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);\r
 186 }}}\r
 187 \r
 188 void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria)\r
 189 {\r
 190     if( src.empty() )\r
 191         CV_Error( CV_StsBadArg, "The input image is empty" );\r
 192 \r
 193     if( src.depth() != CV_8U || src.channels() != 4 )\r
 194         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );\r
 195 \r
 196     dstr.create( src.size(), CV_8UC4 );\r
 197     dstsp.create( src.size(), CV_16SC2 );\r
 198 \r
 199     if( !(criteria.type & TermCriteria::MAX_ITER) )\r
 200         criteria.maxCount = 5;\r
 201 \r
 202     int maxIter = std::min(std::max(criteria.maxCount, 1), 100);\r
 203 \r
 204     float eps;\r
 205     if( !(criteria.type & TermCriteria::EPS) )\r
 206         eps = 1.f;\r
 207     eps = (float)std::max(criteria.epsilon, 0.0);\r
 208 \r
 209     imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);\r
 210 }\r
 211 \r
 212 ////////////////////////////////////////////////////////////////////////\r
 213 // drawColorDisp\r
 214 \r
 215 namespace cv { namespace gpu {  namespace imgproc\r
 216 {\r
 217     void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 218     void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
 219 }}}\r
 220 \r
 221 namespace\r
 222 {\r
 223     template <typename T>\r
 224     void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)\r
 225     {\r
 226         dst.create(src.size(), CV_8UC4);\r
 227 \r
 228         imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, dst, ndisp, stream);\r
 229     }\r
 230 \r
 231     typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);\r
 232 \r
 233     const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};\r
 234 }\r
 235 \r
 236 void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream)\r
 237 {\r
 238     CV_Assert(src.type() == CV_8U || src.type() == CV_16S);\r
 239 \r
 240     drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream));\r
 241 }\r
 242 \r
 243 ////////////////////////////////////////////////////////////////////////\r
 244 // reprojectImageTo3D\r
 245 \r
 246 namespace cv { namespace gpu {  namespace imgproc\r
 247 {\r
 248     void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 249     void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
 250 }}}\r
 251 \r
 252 namespace\r
 253 {\r
 254     template <typename T>\r
 255     void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)\r
 256     {\r
 257         xyzw.create(disp.rows, disp.cols, CV_32FC4);\r
 258         imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);\r
 259     }\r
 260 \r
 261     typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);\r
 262 \r
 263     const reprojectImageTo3D_caller_t reprojectImageTo3D_callers[] = {reprojectImageTo3D_caller<unsigned char>, 0, 0, reprojectImageTo3D_caller<short>, 0, 0, 0, 0};\r
 264 }\r
 265 \r
 266 void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, Stream& stream)\r
 267 {\r
 268     CV_Assert((disp.type() == CV_8U || disp.type() == CV_16S) && Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4);\r
 269 \r
 270     reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));\r
 271 }\r
 272 \r
 273 ////////////////////////////////////////////////////////////////////////\r
 274 // resize\r
 275 \r
 276 namespace cv { namespace gpu {  namespace imgproc\r
 277 {\r
 278     template <typename T> void resize_gpu(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
 279 }}}\r
 280 \r
 281 void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
 282 {\r
 283     CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );\r
 284     CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );\r
 285     CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );\r
 286 \r
 287     if( dsize == Size() )\r
 288     {\r
 289         dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));\r
 290     }\r
 291     else\r
 292     {\r
 293         fx = (double)dsize.width / src.cols;\r
 294         fy = (double)dsize.height / src.rows;\r
 295     }\r
 296 \r
 297     dst.create(dsize, src.type());\r
 298 \r
 299     if (dsize == src.size())\r
 300     {\r
 301         if (s)\r
 302             s.enqueueCopy(src, dst);\r
 303         else\r
 304             src.copyTo(dst);\r
 305         return;\r
 306     }\r
 307 \r
 308     cudaStream_t stream = StreamAccessor::getStream(s);\r
 309 \r
 310     if ((src.type() == CV_8UC1 || src.type() == CV_8UC4) && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR))\r
 311     {\r
 312         static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};\r
 313 \r
 314         NppiSize srcsz;\r
 315         srcsz.width  = src.cols;\r
 316         srcsz.height = src.rows;\r
 317         NppiRect srcrect;\r
 318         srcrect.x = srcrect.y = 0;\r
 319         srcrect.width  = src.cols;\r
 320         srcrect.height = src.rows;\r
 321         NppiSize dstsz;\r
 322         dstsz.width  = dst.cols;\r
 323         dstsz.height = dst.rows;\r
 324 \r
 325         NppStreamHandler h(stream);\r
 326 \r
 327         if (src.type() == CV_8UC1)\r
 328         {\r
 329             nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 330                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 331         }\r
 332         else\r
 333         {\r
 334             nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
 335                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
 336         }\r
 337 \r
 338         if (stream == 0)\r
 339             cudaSafeCall( cudaDeviceSynchronize() );\r
 340     }\r
 341     else\r
 342     {\r
 343         using namespace cv::gpu::imgproc;\r
 344 \r
 345         typedef void (*caller_t)(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
 346         static const caller_t callers[6][4] = \r
 347         {\r
 348             {resize_gpu<uchar>, 0/*resize_gpu<uchar2>*/, resize_gpu<uchar3>, resize_gpu<uchar4>},\r
 349             {0/*resize_gpu<schar>*/, 0/*resize_gpu<char2>*/, 0/*resize_gpu<char3>*/, 0/*resize_gpu<char4>*/},\r
 350             {resize_gpu<ushort>, 0/*resize_gpu<ushort2>*/, resize_gpu<ushort3>, resize_gpu<ushort4>},\r
 351             {resize_gpu<short>, 0/*resize_gpu<short2>*/, resize_gpu<short3>, resize_gpu<short4>},\r
 352             {0/*resize_gpu<int>*/, 0/*resize_gpu<int2>*/, 0/*resize_gpu<int3>*/, 0/*resize_gpu<int4>*/},\r
 353             {resize_gpu<float>, 0/*resize_gpu<float2>*/, resize_gpu<float3>, resize_gpu<float4>}\r
 354         };\r
 355 \r
 356         callers[src.depth()][src.channels() - 1](src, static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);\r
 357     }\r
 358 }\r
 359 \r
 360 ////////////////////////////////////////////////////////////////////////\r
 361 // copyMakeBorder\r
 362 \r
 363 namespace cv { namespace gpu {  namespace imgproc\r
 364 {\r
 365     template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);\r
 366 }}}\r
 367 \r
 368 namespace\r
 369 {\r
 370     template <typename T, int cn> void copyMakeBorder_caller(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)\r
 371     {\r
 372         Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));\r
 373 \r
 374         imgproc::copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);\r
 375     }\r
 376 }\r
 377 \r
 378 void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)\r
 379 {\r
 380     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
 381     CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
 382 \r
 383     dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
 384 \r
 385     cudaStream_t stream = StreamAccessor::getStream(s);\r
 386 \r
 387     if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))\r
 388     {\r
 389         NppiSize srcsz;\r
 390         srcsz.width  = src.cols;\r
 391         srcsz.height = src.rows;\r
 392 \r
 393         NppiSize dstsz;\r
 394         dstsz.width  = dst.cols;\r
 395         dstsz.height = dst.rows;\r
 396 \r
 397         NppStreamHandler h(stream);\r
 398 \r
 399         switch (src.type())\r
 400         {\r
 401         case CV_8UC1:\r
 402             {\r
 403                 Npp8u nVal = saturate_cast<Npp8u>(value[0]);\r
 404                 nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 405                     dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 406                 break;\r
 407             }\r
 408         case CV_8UC4:\r
 409             {\r
 410                 Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};\r
 411                 nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
 412                     dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 413                 break;\r
 414             }\r
 415         case CV_32SC1:\r
 416             {\r
 417                 Npp32s nVal = saturate_cast<Npp32s>(value[0]);\r
 418                 nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 419                     dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 420                 break;\r
 421             }\r
 422         case CV_32FC1:\r
 423             {\r
 424                 Npp32f val = saturate_cast<Npp32f>(value[0]);\r
 425                 Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
 426                 nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
 427                     dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
 428                 break;\r
 429             }\r
 430         }\r
 431 \r
 432         if (stream == 0)\r
 433             cudaSafeCall( cudaDeviceSynchronize() );\r
 434     }\r
 435     else\r
 436     {\r
 437         typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);\r
 438         static const caller_t callers[6][4] = \r
 439         {\r
 440             {   copyMakeBorder_caller<uchar, 1>  , 0/*copyMakeBorder_caller<uchar, 2>*/ ,    copyMakeBorder_caller<uchar, 3>  ,    copyMakeBorder_caller<uchar, 4>},\r
 441             {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},\r
 442             {   copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/,    copyMakeBorder_caller<ushort, 3> ,    copyMakeBorder_caller<ushort, 4>},\r
 443             {   copyMakeBorder_caller<short, 1>  , 0/*copyMakeBorder_caller<short, 2>*/ ,    copyMakeBorder_caller<short, 3>  ,    copyMakeBorder_caller<short, 4>},\r
 444             {0/*copyMakeBorder_caller<int, 1>*/  , 0/*copyMakeBorder_caller<int, 2>*/   , 0/*copyMakeBorder_caller<int, 3>*/  , 0/*copyMakeBorder_caller<int, 4>*/},\r
 445             {   copyMakeBorder_caller<float, 1>  , 0/*copyMakeBorder_caller<float, 2>*/ ,    copyMakeBorder_caller<float, 3>  ,    copyMakeBorder_caller<float ,4>}\r
 446         };\r
 447 \r
 448         caller_t func = callers[src.depth()][src.channels() - 1];\r
 449         CV_Assert(func != 0);\r
 450 \r
 451         int gpuBorderType;\r
 452         CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
 453 \r
 454         func(src, dst, top, left, gpuBorderType, value, stream);\r
 455     }\r
 456 }\r
 457 \r
 458 ////////////////////////////////////////////////////////////////////////\r
 459 // warp\r
 460 \r
 461 namespace\r
 462 {\r
 463     typedef NppStatus (*npp_warp_8u_t)(const Npp8u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp8u* pDst,\r
 464                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 465                                        int interpolation);\r
 466     typedef NppStatus (*npp_warp_16u_t)(const Npp16u* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp16u* pDst,\r
 467                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 468                                        int interpolation);\r
 469     typedef NppStatus (*npp_warp_32s_t)(const Npp32s* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32s* pDst,\r
 470                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 471                                        int interpolation);\r
 472     typedef NppStatus (*npp_warp_32f_t)(const Npp32f* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, Npp32f* pDst,\r
 473                                        int dstStep, NppiRect dstRoi, const double coeffs[][3],\r
 474                                        int interpolation);\r
 475 \r
 476     void nppWarpCaller(const GpuMat& src, GpuMat& dst, double coeffs[][3], const Size& dsize, int flags,\r
 477                        npp_warp_8u_t npp_warp_8u[][2], npp_warp_16u_t npp_warp_16u[][2],\r
 478                        npp_warp_32s_t npp_warp_32s[][2], npp_warp_32f_t npp_warp_32f[][2], cudaStream_t stream)\r
 479     {\r
 480         static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 481 \r
 482         int interpolation = flags & INTER_MAX;\r
 483 \r
 484         CV_Assert((src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F) && src.channels() != 2);\r
 485         CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 486 \r
 487         dst.create(dsize, src.type());\r
 488 \r
 489         NppiSize srcsz;\r
 490         srcsz.height = src.rows;\r
 491         srcsz.width = src.cols;\r
 492         NppiRect srcroi;\r
 493         srcroi.x = srcroi.y = 0;\r
 494         srcroi.height = src.rows;\r
 495         srcroi.width = src.cols;\r
 496         NppiRect dstroi;\r
 497         dstroi.x = dstroi.y = 0;\r
 498         dstroi.height = dst.rows;\r
 499         dstroi.width = dst.cols;\r
 500 \r
 501         int warpInd = (flags & WARP_INVERSE_MAP) >> 4;\r
 502 \r
 503         NppStreamHandler h(stream);\r
 504 \r
 505         switch (src.depth())\r
 506         {\r
 507         case CV_8U:\r
 508             nppSafeCall( npp_warp_8u[src.channels()][warpInd](src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 509                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 510             break;\r
 511         case CV_16U:\r
 512             nppSafeCall( npp_warp_16u[src.channels()][warpInd](src.ptr<Npp16u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 513                 dst.ptr<Npp16u>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 514             break;\r
 515         case CV_32S:\r
 516             nppSafeCall( npp_warp_32s[src.channels()][warpInd](src.ptr<Npp32s>(), srcsz, static_cast<int>(src.step), srcroi,\r
 517                 dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 518             break;\r
 519         case CV_32F:\r
 520             nppSafeCall( npp_warp_32f[src.channels()][warpInd](src.ptr<Npp32f>(), srcsz, static_cast<int>(src.step), srcroi,\r
 521                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]) );\r
 522             break;\r
 523         default:\r
 524             CV_Assert(!"Unsupported source type");\r
 525         }\r
 526 \r
 527         if (stream == 0)\r
 528             cudaSafeCall( cudaDeviceSynchronize() );\r
 529     }\r
 530 }\r
 531 \r
 532 void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 533 {\r
 534     static npp_warp_8u_t npp_warpAffine_8u[][2] =\r
 535         {\r
 536             {0, 0},\r
 537             {nppiWarpAffine_8u_C1R, nppiWarpAffineBack_8u_C1R},\r
 538             {0, 0},\r
 539             {nppiWarpAffine_8u_C3R, nppiWarpAffineBack_8u_C3R},\r
 540             {nppiWarpAffine_8u_C4R, nppiWarpAffineBack_8u_C4R}\r
 541         };\r
 542     static npp_warp_16u_t npp_warpAffine_16u[][2] =\r
 543         {\r
 544             {0, 0},\r
 545             {nppiWarpAffine_16u_C1R, nppiWarpAffineBack_16u_C1R},\r
 546             {0, 0},\r
 547             {nppiWarpAffine_16u_C3R, nppiWarpAffineBack_16u_C3R},\r
 548             {nppiWarpAffine_16u_C4R, nppiWarpAffineBack_16u_C4R}\r
 549         };\r
 550     static npp_warp_32s_t npp_warpAffine_32s[][2] =\r
 551         {\r
 552             {0, 0},\r
 553             {nppiWarpAffine_32s_C1R, nppiWarpAffineBack_32s_C1R},\r
 554             {0, 0},\r
 555             {nppiWarpAffine_32s_C3R, nppiWarpAffineBack_32s_C3R},\r
 556             {nppiWarpAffine_32s_C4R, nppiWarpAffineBack_32s_C4R}\r
 557         };\r
 558     static npp_warp_32f_t npp_warpAffine_32f[][2] =\r
 559         {\r
 560             {0, 0},\r
 561             {nppiWarpAffine_32f_C1R, nppiWarpAffineBack_32f_C1R},\r
 562             {0, 0},\r
 563             {nppiWarpAffine_32f_C3R, nppiWarpAffineBack_32f_C3R},\r
 564             {nppiWarpAffine_32f_C4R, nppiWarpAffineBack_32f_C4R}\r
 565         };\r
 566 \r
 567     CV_Assert(M.rows == 2 && M.cols == 3);\r
 568 \r
 569     double coeffs[2][3];\r
 570     Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);\r
 571     M.convertTo(coeffsMat, coeffsMat.type());\r
 572 \r
 573     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpAffine_8u, npp_warpAffine_16u, npp_warpAffine_32s, npp_warpAffine_32f, StreamAccessor::getStream(s));\r
 574 }\r
 575 \r
 576 void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, Stream& s)\r
 577 {\r
 578     static npp_warp_8u_t npp_warpPerspective_8u[][2] =\r
 579         {\r
 580             {0, 0},\r
 581             {nppiWarpPerspective_8u_C1R, nppiWarpPerspectiveBack_8u_C1R},\r
 582             {0, 0},\r
 583             {nppiWarpPerspective_8u_C3R, nppiWarpPerspectiveBack_8u_C3R},\r
 584             {nppiWarpPerspective_8u_C4R, nppiWarpPerspectiveBack_8u_C4R}\r
 585         };\r
 586     static npp_warp_16u_t npp_warpPerspective_16u[][2] =\r
 587         {\r
 588             {0, 0},\r
 589             {nppiWarpPerspective_16u_C1R, nppiWarpPerspectiveBack_16u_C1R},\r
 590             {0, 0},\r
 591             {nppiWarpPerspective_16u_C3R, nppiWarpPerspectiveBack_16u_C3R},\r
 592             {nppiWarpPerspective_16u_C4R, nppiWarpPerspectiveBack_16u_C4R}\r
 593         };\r
 594     static npp_warp_32s_t npp_warpPerspective_32s[][2] =\r
 595         {\r
 596             {0, 0},\r
 597             {nppiWarpPerspective_32s_C1R, nppiWarpPerspectiveBack_32s_C1R},\r
 598             {0, 0},\r
 599             {nppiWarpPerspective_32s_C3R, nppiWarpPerspectiveBack_32s_C3R},\r
 600             {nppiWarpPerspective_32s_C4R, nppiWarpPerspectiveBack_32s_C4R}\r
 601         };\r
 602     static npp_warp_32f_t npp_warpPerspective_32f[][2] =\r
 603         {\r
 604             {0, 0},\r
 605             {nppiWarpPerspective_32f_C1R, nppiWarpPerspectiveBack_32f_C1R},\r
 606             {0, 0},\r
 607             {nppiWarpPerspective_32f_C3R, nppiWarpPerspectiveBack_32f_C3R},\r
 608             {nppiWarpPerspective_32f_C4R, nppiWarpPerspectiveBack_32f_C4R}\r
 609         };\r
 610 \r
 611     CV_Assert(M.rows == 3 && M.cols == 3);\r
 612 \r
 613     double coeffs[3][3];\r
 614     Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);\r
 615     M.convertTo(coeffsMat, coeffsMat.type());\r
 616 \r
 617     nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));\r
 618 }\r
 619 \r
 620 //////////////////////////////////////////////////////////////////////////////\r
 621 // buildWarpPlaneMaps\r
 622 \r
 623 namespace cv { namespace gpu { namespace imgproc\r
 624 {\r
 625     void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 626                             const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,\r
 627                             cudaStream_t stream);\r
 628 }}}\r
 629 \r
 630 void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, \r
 631                                  float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 632 {\r
 633     CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);\r
 634     CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);\r
 635     CV_Assert((T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32F && T.isContinuous());\r
 636 \r
 637     Mat K_Rinv = K * R.t();\r
 638     Mat R_Kinv = R * K.inv();\r
 639     CV_Assert(K_Rinv.isContinuous());\r
 640     CV_Assert(R_Kinv.isContinuous());\r
 641 \r
 642     map_x.create(dst_roi.size(), CV_32F);\r
 643     map_y.create(dst_roi.size(), CV_32F);\r
 644     imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), \r
 645                                 T.ptr<float>(), scale, StreamAccessor::getStream(stream));\r
 646 }\r
 647 \r
 648 //////////////////////////////////////////////////////////////////////////////\r
 649 // buildWarpCylyndricalMaps\r
 650 \r
 651 namespace cv { namespace gpu { namespace imgproc\r
 652 {\r
 653     void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 654                                   const float k_rinv[9], const float r_kinv[9], float scale,\r
 655                                   cudaStream_t stream);\r
 656 }}}\r
 657 \r
 658 void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,\r
 659                                        GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 660 {\r
 661     CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);\r
 662     CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);\r
 663 \r
 664     Mat K_Rinv = K * R.t();\r
 665     Mat R_Kinv = R * K.inv();\r
 666     CV_Assert(K_Rinv.isContinuous());\r
 667     CV_Assert(R_Kinv.isContinuous());\r
 668 \r
 669     map_x.create(dst_roi.size(), CV_32F);\r
 670     map_y.create(dst_roi.size(), CV_32F);\r
 671     imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),\r
 672                                       scale, StreamAccessor::getStream(stream));\r
 673 }\r
 674 \r
 675 \r
 676 //////////////////////////////////////////////////////////////////////////////\r
 677 // buildWarpSphericalMaps\r
 678 \r
 679 namespace cv { namespace gpu { namespace imgproc\r
 680 {\r
 681     void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
 682                                 const float k_rinv[9], const float r_kinv[9], float scale,\r
 683                                 cudaStream_t stream);\r
 684 }}}\r
 685 \r
 686 void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,\r
 687                                      GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
 688 {\r
 689     CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);\r
 690     CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);\r
 691 \r
 692     Mat K_Rinv = K * R.t();\r
 693     Mat R_Kinv = R * K.inv();\r
 694     CV_Assert(K_Rinv.isContinuous());\r
 695     CV_Assert(R_Kinv.isContinuous());\r
 696 \r
 697     map_x.create(dst_roi.size(), CV_32F);\r
 698     map_y.create(dst_roi.size(), CV_32F);\r
 699     imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),\r
 700                                     scale, StreamAccessor::getStream(stream));\r
 701 }\r
 702 \r
 703 ////////////////////////////////////////////////////////////////////////\r
 704 // rotate\r
 705 \r
 706 void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& s)\r
 707 {\r
 708     static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};\r
 709 \r
 710     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
 711     CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
 712 \r
 713     dst.create(dsize, src.type());\r
 714 \r
 715     NppiSize srcsz;\r
 716     srcsz.height = src.rows;\r
 717     srcsz.width = src.cols;\r
 718     NppiRect srcroi;\r
 719     srcroi.x = srcroi.y = 0;\r
 720     srcroi.height = src.rows;\r
 721     srcroi.width = src.cols;\r
 722     NppiRect dstroi;\r
 723     dstroi.x = dstroi.y = 0;\r
 724     dstroi.height = dst.rows;\r
 725     dstroi.width = dst.cols;\r
 726 \r
 727     cudaStream_t stream = StreamAccessor::getStream(s);\r
 728 \r
 729     NppStreamHandler h(stream);\r
 730 \r
 731     if (src.type() == CV_8UC1)\r
 732     {\r
 733         nppSafeCall( nppiRotate_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 734             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 735     }\r
 736     else\r
 737     {\r
 738         nppSafeCall( nppiRotate_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcroi,\r
 739             dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );\r
 740     }\r
 741 \r
 742     if (stream == 0)\r
 743         cudaSafeCall( cudaDeviceSynchronize() );\r
 744 }\r
 745 \r
 746 ////////////////////////////////////////////////////////////////////////\r
 747 // integral\r
 748 \r
 749 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s)\r
 750 {\r
 751     GpuMat buffer;\r
 752     integralBuffered(src, sum, buffer, s);\r
 753 }\r
 754 \r
 755 void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s)\r
 756 {\r
 757     CV_Assert(src.type() == CV_8UC1);\r
 758 \r
 759     sum.create(src.rows + 1, src.cols + 1, CV_32S);\r
 760     \r
 761     NcvSize32u roiSize;\r
 762     roiSize.width = src.cols;\r
 763     roiSize.height = src.rows;\r
 764 \r
 765         cudaDeviceProp prop;\r
 766         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 767 \r
 768     Ncv32u bufSize;\r
 769     nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );\r
 770     ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);\r
 771 \r
 772     cudaStream_t stream = StreamAccessor::getStream(s);\r
 773 \r
 774     NppStStreamHandler h(stream);\r
 775 \r
 776     nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step), \r
 777         sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );\r
 778 \r
 779     if (stream == 0)\r
 780         cudaSafeCall( cudaDeviceSynchronize() );\r
 781 }\r
 782 \r
 783 void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum, Stream& s)\r
 784 {\r
 785     CV_Assert(src.type() == CV_8UC1);\r
 786 \r
 787     int width = src.cols + 1, height = src.rows + 1;\r
 788 \r
 789     sum.create(height, width, CV_32S);\r
 790     sqsum.create(height, width, CV_32F);\r
 791 \r
 792     NppiSize sz;\r
 793     sz.width = src.cols;\r
 794     sz.height = src.rows;\r
 795 \r
 796     cudaStream_t stream = StreamAccessor::getStream(s);\r
 797 \r
 798     NppStreamHandler h(stream);\r
 799 \r
 800     nppSafeCall( nppiSqrIntegral_8u32s32f_C1R(const_cast<Npp8u*>(src.ptr<Npp8u>()), static_cast<int>(src.step), \r
 801         sum.ptr<Npp32s>(), static_cast<int>(sum.step), sqsum.ptr<Npp32f>(), static_cast<int>(sqsum.step), sz, 0, 0.0f, height) );\r
 802 \r
 803     if (stream == 0)\r
 804         cudaSafeCall( cudaDeviceSynchronize() );\r
 805 }\r
 806 \r
 807 //////////////////////////////////////////////////////////////////////////////\r
 808 // sqrIntegral\r
 809 \r
 810 void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)\r
 811 {\r
 812     CV_Assert(src.type() == CV_8U);\r
 813 \r
 814     NcvSize32u roiSize;\r
 815     roiSize.width = src.cols;\r
 816     roiSize.height = src.rows;\r
 817 \r
 818         cudaDeviceProp prop;\r
 819         cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
 820 \r
 821     Ncv32u bufSize;\r
 822     nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));       \r
 823     GpuMat buf(1, bufSize, CV_8U);\r
 824 \r
 825     cudaStream_t stream = StreamAccessor::getStream(s);\r
 826 \r
 827     NppStStreamHandler h(stream);\r
 828 \r
 829     sqsum.create(src.rows + 1, src.cols + 1, CV_64F);\r
 830     nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step), \r
 831             sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));\r
 832 \r
 833     if (stream == 0)\r
 834         cudaSafeCall( cudaDeviceSynchronize() );\r
 835 }\r
 836 \r
 837 //////////////////////////////////////////////////////////////////////////////\r
 838 // columnSum\r
 839 \r
 840 namespace cv { namespace gpu { namespace imgproc\r
 841 {\r
 842     void columnSum_32F(const DevMem2D src, const DevMem2D dst);\r
 843 }}}\r
 844 \r
 845 void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)\r
 846 {\r
 847     CV_Assert(src.type() == CV_32F);\r
 848 \r
 849     dst.create(src.size(), CV_32F);\r
 850     imgproc::columnSum_32F(src, dst);\r
 851 }\r
 852 \r
 853 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)\r
 854 {\r
 855     CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);\r
 856 \r
 857     dst.create(src.size(), CV_32FC1);\r
 858 \r
 859     NppiSize sz;\r
 860     sz.width = src.cols;\r
 861     sz.height = src.rows;\r
 862 \r
 863     NppiRect nppRect;\r
 864     nppRect.height = rect.height;\r
 865     nppRect.width = rect.width;\r
 866     nppRect.x = rect.x;\r
 867     nppRect.y = rect.y;\r
 868 \r
 869     cudaStream_t stream = StreamAccessor::getStream(s);\r
 870 \r
 871     NppStreamHandler h(stream);\r
 872 \r
 873     nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp32f>(), static_cast<int>(sqr.step),\r
 874                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );\r
 875 \r
 876     if (stream == 0)\r
 877         cudaSafeCall( cudaDeviceSynchronize() );\r
 878 }\r
 879 \r
 880 \r
 881 ////////////////////////////////////////////////////////////////////////\r
 882 // Histogram\r
 883 \r
 884 namespace\r
 885 {\r
 886     template<int n> struct NPPTypeTraits;\r
 887     template<> struct NPPTypeTraits<CV_8U>  { typedef Npp8u npp_type; };\r
 888     template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };\r
 889     template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };\r
 890     template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };\r
 891 \r
 892     typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);\r
 893     typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);\r
 894 \r
 895     template<int SDEPTH> struct NppHistogramEvenFuncC1\r
 896     {\r
 897         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 898 \r
 899         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,\r
 900                     int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);\r
 901     };\r
 902     template<int SDEPTH> struct NppHistogramEvenFuncC4\r
 903     {\r
 904         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 905 \r
 906         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,\r
 907             Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);\r
 908     };\r
 909 \r
 910     template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
 911     struct NppHistogramEvenC1\r
 912     {\r
 913         typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;\r
 914 \r
 915         static void hist(const GpuMat& src, GpuMat& hist, GpuMat& buffer, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)\r
 916         {\r
 917             int levels = histSize + 1;\r
 918             hist.create(1, histSize, CV_32S);\r
 919 \r
 920             NppiSize sz;\r
 921             sz.width = src.cols;\r
 922             sz.height = src.rows;\r
 923 \r
 924             int buf_size;\r
 925             get_buf_size(sz, levels, &buf_size);\r
 926 \r
 927             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 928 \r
 929             NppStreamHandler h(stream);\r
 930 \r
 931             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,\r
 932                 lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 933 \r
 934             if (stream == 0)\r
 935                 cudaSafeCall( cudaDeviceSynchronize() );\r
 936         }\r
 937     };\r
 938     template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
 939     struct NppHistogramEvenC4\r
 940     {\r
 941         typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;\r
 942 \r
 943         static void hist(const GpuMat& src, GpuMat hist[4], GpuMat& buffer, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)\r
 944         {\r
 945             int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};\r
 946             hist[0].create(1, histSize[0], CV_32S);\r
 947             hist[1].create(1, histSize[1], CV_32S);\r
 948             hist[2].create(1, histSize[2], CV_32S);\r
 949             hist[3].create(1, histSize[3], CV_32S);\r
 950 \r
 951             NppiSize sz;\r
 952             sz.width = src.cols;\r
 953             sz.height = src.rows;\r
 954 \r
 955             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
 956 \r
 957             int buf_size;\r
 958             get_buf_size(sz, levels, &buf_size);\r
 959 \r
 960             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
 961 \r
 962             NppStreamHandler h(stream);\r
 963 \r
 964             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );\r
 965 \r
 966             if (stream == 0)\r
 967                 cudaSafeCall( cudaDeviceSynchronize() );\r
 968         }\r
 969     };\r
 970 \r
 971     template<int SDEPTH> struct NppHistogramRangeFuncC1\r
 972     {\r
 973         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 974         typedef Npp32s level_t;\r
 975         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 976 \r
 977         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 978             const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);\r
 979     };\r
 980     template<> struct NppHistogramRangeFuncC1<CV_32F>\r
 981     {\r
 982         typedef Npp32f src_t;\r
 983         typedef Npp32f level_t;\r
 984         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
 985 \r
 986         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,\r
 987             const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);\r
 988     };\r
 989     template<int SDEPTH> struct NppHistogramRangeFuncC4\r
 990     {\r
 991         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
 992         typedef Npp32s level_t;\r
 993         enum {LEVEL_TYPE_CODE=CV_32SC1};\r
 994 \r
 995         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
 996             const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
 997     };\r
 998     template<> struct NppHistogramRangeFuncC4<CV_32F>\r
 999     {\r
1000         typedef Npp32f src_t;\r
1001         typedef Npp32f level_t;\r
1002         enum {LEVEL_TYPE_CODE=CV_32FC1};\r
1003 \r
1004         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],\r
1005             const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);\r
1006     };\r
1007 \r
1008     template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>\r
1009     struct NppHistogramRangeC1\r
1010     {\r
1011         typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;\r
1012         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
1013         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
1014 \r
1015         static void hist(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buffer, cudaStream_t stream)\r
1016         {\r
1017             CV_Assert(levels.type() == LEVEL_TYPE_CODE && levels.rows == 1);\r
1018 \r
1019             hist.create(1, levels.cols - 1, CV_32S);\r
1020 \r
1021             NppiSize sz;\r
1022             sz.width = src.cols;\r
1023             sz.height = src.rows;\r
1024 \r
1025             int buf_size;\r
1026             get_buf_size(sz, levels.cols, &buf_size);\r
1027             \r
1028             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
1029 \r
1030             NppStreamHandler h(stream);\r
1031 \r
1032             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );\r
1033 \r
1034             if (stream == 0)\r
1035                 cudaSafeCall( cudaDeviceSynchronize() );\r
1036         }\r
1037     };\r
1038     template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>\r
1039     struct NppHistogramRangeC4\r
1040     {\r
1041         typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;\r
1042         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;\r
1043         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};\r
1044 \r
1045         static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buffer, cudaStream_t stream)\r
1046         {\r
1047             CV_Assert(levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1);\r
1048             CV_Assert(levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1);\r
1049             CV_Assert(levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1);\r
1050             CV_Assert(levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1);\r
1051 \r
1052             hist[0].create(1, levels[0].cols - 1, CV_32S);\r
1053             hist[1].create(1, levels[1].cols - 1, CV_32S);\r
1054             hist[2].create(1, levels[2].cols - 1, CV_32S);\r
1055             hist[3].create(1, levels[3].cols - 1, CV_32S);\r
1056 \r
1057             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};\r
1058             int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};\r
1059             const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};\r
1060 \r
1061             NppiSize sz;\r
1062             sz.width = src.cols;\r
1063             sz.height = src.rows;\r
1064 \r
1065             int buf_size;\r
1066             get_buf_size(sz, nLevels, &buf_size);\r
1067 \r
1068             ensureSizeIsEnough(1, buf_size, CV_8U, buffer);\r
1069 \r
1070             NppStreamHandler h(stream);\r
1071 \r
1072             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );\r
1073 \r
1074             if (stream == 0)\r
1075                 cudaSafeCall( cudaDeviceSynchronize() );\r
1076         }\r
1077     };\r
1078 }\r
1079 \r
1080 void cv::gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel)\r
1081 {\r
1082     Mat host_levels(1, nLevels, CV_32SC1);\r
1083     nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );\r
1084     levels.upload(host_levels);\r
1085 }\r
1086 \r
1087 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
1088 {\r
1089     GpuMat buf;\r
1090     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
1091 }\r
1092 \r
1093 void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)\r
1094 {\r
1095     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );\r
1096 \r
1097     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, GpuMat& buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);\r
1098     static const hist_t hist_callers[] =\r
1099     {\r
1100         NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,\r
1101         0,\r
1102         NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,\r
1103         NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist\r
1104     };\r
1105 \r
1106     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
1107 }\r
1108 \r
1109 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
1110 {\r
1111     GpuMat buf;\r
1112     histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);\r
1113 }\r
1114 \r
1115 void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)\r
1116 {\r
1117     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );\r
1118 \r
1119     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);\r
1120     static const hist_t hist_callers[] =\r
1121     {\r
1122         NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,\r
1123         0,\r
1124         NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,\r
1125         NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist\r
1126     };\r
1127 \r
1128     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));\r
1129 }\r
1130 \r
1131 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream)\r
1132 {\r
1133     GpuMat buf;\r
1134     histRange(src, hist, levels, buf, stream);\r
1135 }\r
1136 \r
1137 \r
1138 void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream)\r
1139 {\r
1140     CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);\r
1141 \r
1142     typedef void (*hist_t)(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, cudaStream_t stream);\r
1143     static const hist_t hist_callers[] =\r
1144     {\r
1145         NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,\r
1146         0,\r
1147         NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,\r
1148         NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,\r
1149         0,\r
1150         NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist\r
1151     };\r
1152 \r
1153     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1154 }\r
1155 \r
1156 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)\r
1157 {\r
1158     GpuMat buf;\r
1159     histRange(src, hist, levels, buf, stream);\r
1160 }\r
1161 \r
1162 void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream)\r
1163 {\r
1164     CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4);\r
1165 \r
1166     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, cudaStream_t stream);\r
1167     static const hist_t hist_callers[] =\r
1168     {\r
1169         NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,\r
1170         0,\r
1171         NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,\r
1172         NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,\r
1173         0,\r
1174         NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist\r
1175     };\r
1176 \r
1177     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));\r
1178 }\r
1179 \r
1180 namespace cv { namespace gpu { namespace histograms\r
1181 {\r
1182     void histogram256_gpu(DevMem2D src, int* hist, unsigned int* buf, cudaStream_t stream);\r
1183 \r
1184     const int PARTIAL_HISTOGRAM256_COUNT = 240;\r
1185     const int HISTOGRAM256_BIN_COUNT     = 256;\r
1186 }}}\r
1187 \r
1188 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)\r
1189 {\r
1190     GpuMat buf;\r
1191     calcHist(src, hist, buf, stream);\r
1192 }\r
1193 \r
1194 void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)\r
1195 {\r
1196     using namespace cv::gpu::histograms;\r
1197 \r
1198     CV_Assert(src.type() == CV_8UC1);\r
1199 \r
1200     hist.create(1, 256, CV_32SC1);\r
1201 \r
1202     ensureSizeIsEnough(1, PARTIAL_HISTOGRAM256_COUNT * HISTOGRAM256_BIN_COUNT, CV_32SC1, buf);\r
1203 \r
1204     histogram256_gpu(src, hist.ptr<int>(), buf.ptr<unsigned int>(), StreamAccessor::getStream(stream));\r
1205 }\r
1206 \r
1207 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream)\r
1208 {\r
1209     GpuMat hist;\r
1210     GpuMat buf;\r
1211     equalizeHist(src, dst, hist, buf, stream);\r
1212 }\r
1213 \r
1214 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream)\r
1215 {\r
1216     GpuMat buf;\r
1217     equalizeHist(src, dst, hist, buf, stream);\r
1218 }\r
1219 \r
1220 namespace cv { namespace gpu { namespace histograms\r
1221 {\r
1222     void equalizeHist_gpu(DevMem2D src, DevMem2D dst, const int* lut, cudaStream_t stream);\r
1223 }}}\r
1224 \r
1225 void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)\r
1226 {\r
1227     using namespace cv::gpu::histograms;\r
1228 \r
1229     CV_Assert(src.type() == CV_8UC1);\r
1230 \r
1231     dst.create(src.size(), src.type());\r
1232 \r
1233     int intBufSize;\r
1234     nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) );\r
1235 \r
1236     int bufSize = static_cast<int>(std::max(256 * 240 * sizeof(int), intBufSize + 256 * sizeof(int)));\r
1237 \r
1238     ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);\r
1239 \r
1240     GpuMat histBuf(1, 256 * 240, CV_32SC1, buf.ptr());\r
1241     GpuMat intBuf(1, intBufSize, CV_8UC1, buf.ptr());\r
1242     GpuMat lut(1, 256, CV_32S, buf.ptr() + intBufSize);\r
1243 \r
1244     calcHist(src, hist, histBuf, s);\r
1245 \r
1246     cudaStream_t stream = StreamAccessor::getStream(s);\r
1247 \r
1248     NppStreamHandler h(stream);\r
1249 \r
1250     nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) );\r
1251     \r
1252     if (stream == 0)\r
1253         cudaSafeCall( cudaDeviceSynchronize() );\r
1254 \r
1255     equalizeHist_gpu(src, dst, lut.ptr<int>(), stream);\r
1256 }\r
1257 \r
1258 ////////////////////////////////////////////////////////////////////////\r
1259 // cornerHarris & minEgenVal\r
1260 \r
1261 namespace cv { namespace gpu { namespace imgproc {\r
1262 \r
1263     void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst);\r
1264     void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1265     void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);\r
1266 \r
1267 }}}\r
1268 \r
1269 namespace \r
1270 {\r
1271     template <typename T>\r
1272     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1273     {   \r
1274         double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;\r
1275         if (ksize < 0) \r
1276             scale *= 2.;\r
1277         if (src.depth() == CV_8U)\r
1278             scale *= 255.;\r
1279         scale = 1./scale;\r
1280 \r
1281         Dx.create(src.size(), CV_32F);\r
1282         Dy.create(src.size(), CV_32F);\r
1283 \r
1284         if (ksize > 0)\r
1285         {\r
1286             Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, borderType);\r
1287             Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, borderType);\r
1288         }\r
1289         else\r
1290         {\r
1291             Scharr(src, Dx, CV_32F, 1, 0, scale, borderType);\r
1292             Scharr(src, Dy, CV_32F, 0, 1, scale, borderType);\r
1293         }\r
1294     }\r
1295 \r
1296     void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1297     {\r
1298         switch (src.type())\r
1299         {\r
1300         case CV_8U:\r
1301             extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, borderType);\r
1302             break;\r
1303         case CV_32F:\r
1304             extractCovData<float>(src, Dx, Dy, blockSize, ksize, borderType);\r
1305             break;\r
1306         default:\r
1307             CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");\r
1308         }\r
1309     }\r
1310 \r
1311 } // Anonymous namespace\r
1312 \r
1313 \r
1314 bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)\r
1315 {\r
1316     switch (cpuBorderType)\r
1317     {\r
1318     case cv::BORDER_REFLECT101:\r
1319         gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;\r
1320         return true;\r
1321     case cv::BORDER_REPLICATE:\r
1322         gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;\r
1323         return true;\r
1324     case cv::BORDER_CONSTANT:\r
1325         gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;\r
1326         return true;\r
1327     case cv::BORDER_REFLECT:\r
1328         gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;\r
1329         return true;\r
1330     case cv::BORDER_WRAP:\r
1331         gpuBorderType = cv::gpu::BORDER_WRAP_GPU;\r
1332         return true;\r
1333     default:\r
1334         return false;\r
1335     };\r
1336     return false;\r
1337 }\r
1338 \r
1339 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)\r
1340 {\r
1341     GpuMat Dx, Dy;\r
1342     cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k, borderType);\r
1343 }\r
1344 \r
1345 void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType)\r
1346 {\r
1347     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1348               borderType == cv::BORDER_REPLICATE);\r
1349 \r
1350     int gpuBorderType;\r
1351     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1352 \r
1353     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);\r
1354     dst.create(src.size(), CV_32F);\r
1355     imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);\r
1356 }\r
1357 \r
1358 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)\r
1359 {  \r
1360     GpuMat Dx, Dy;\r
1361     cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize, borderType);\r
1362 }\r
1363 \r
1364 void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
1365 {  \r
1366     CV_Assert(borderType == cv::BORDER_REFLECT101 ||\r
1367               borderType == cv::BORDER_REPLICATE);\r
1368 \r
1369     int gpuBorderType;\r
1370     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1371 \r
1372     extractCovData(src, Dx, Dy, blockSize, ksize, borderType);    \r
1373     dst.create(src.size(), CV_32F);\r
1374     imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);\r
1375 }\r
1376 \r
1377 //////////////////////////////////////////////////////////////////////////////\r
1378 // mulSpectrums\r
1379 \r
1380 namespace cv { namespace gpu { namespace imgproc \r
1381 {\r
1382     void mulSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1383                       DevMem2D_<cufftComplex> c);\r
1384 \r
1385     void mulSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
1386                            DevMem2D_<cufftComplex> c);\r
1387 }}}\r
1388 \r
1389 \r
1390 void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, \r
1391                            int flags, bool conjB) \r
1392 {\r
1393     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>, \r
1394                            DevMem2D_<cufftComplex>);\r
1395     static Caller callers[] = { imgproc::mulSpectrums, \r
1396                                 imgproc::mulSpectrums_CONJ };\r
1397 \r
1398     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1399     CV_Assert(a.size() == b.size());\r
1400 \r
1401     c.create(a.size(), CV_32FC2);\r
1402 \r
1403     Caller caller = callers[(int)conjB];\r
1404     caller(a, b, c);\r
1405 }\r
1406 \r
1407 //////////////////////////////////////////////////////////////////////////////\r
1408 // mulAndScaleSpectrums\r
1409 \r
1410 namespace cv { namespace gpu { namespace imgproc \r
1411 {\r
1412     void mulAndScaleSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1413                              float scale, DevMem2D_<cufftComplex> c);\r
1414 \r
1415     void mulAndScaleSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
1416                                   float scale, DevMem2D_<cufftComplex> c);\r
1417 }}}\r
1418 \r
1419 \r
1420 void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,\r
1421                                   int flags, float scale, bool conjB) \r
1422 {\r
1423     typedef void (*Caller)(const PtrStep_<cufftComplex>, const PtrStep_<cufftComplex>,\r
1424                            float scale, DevMem2D_<cufftComplex>);\r
1425     static Caller callers[] = { imgproc::mulAndScaleSpectrums, \r
1426                                 imgproc::mulAndScaleSpectrums_CONJ };\r
1427 \r
1428     CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);\r
1429     CV_Assert(a.size() == b.size());\r
1430 \r
1431     c.create(a.size(), CV_32FC2);\r
1432 \r
1433     Caller caller = callers[(int)conjB];\r
1434     caller(a, b, scale, c);\r
1435 }\r
1436 \r
1437 //////////////////////////////////////////////////////////////////////////////\r
1438 // dft\r
1439 \r
1440 void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)\r
1441 {\r
1442     CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);\r
1443 \r
1444     // We don't support unpacked output (in the case of real input)\r
1445     CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));\r
1446 \r
1447     bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);\r
1448     int is_row_dft = flags & DFT_ROWS;\r
1449     int is_scaled_dft = flags & DFT_SCALE;\r
1450     int is_inverse = flags & DFT_INVERSE;\r
1451     bool is_complex_input = src.channels() == 2;\r
1452     bool is_complex_output = !(flags & DFT_REAL_OUTPUT);\r
1453 \r
1454     // We don't support real-to-real transform\r
1455     CV_Assert(is_complex_input || is_complex_output);\r
1456 \r
1457     GpuMat src_data;\r
1458 \r
1459     // Make sure here we work with the continuous input, \r
1460     // as CUFFT can't handle gaps\r
1461     src_data = src;\r
1462     createContinuous(src.rows, src.cols, src.type(), src_data);\r
1463     if (src_data.data != src.data)\r
1464         src.copyTo(src_data);\r
1465 \r
1466     Size dft_size_opt = dft_size;\r
1467     if (is_1d_input && !is_row_dft)\r
1468     {\r
1469         // If the source matrix is single column handle it as single row\r
1470         dft_size_opt.width = std::max(dft_size.width, dft_size.height);\r
1471         dft_size_opt.height = std::min(dft_size.width, dft_size.height);\r
1472     }\r
1473 \r
1474     cufftType dft_type = CUFFT_R2C;\r
1475     if (is_complex_input) \r
1476         dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
1477 \r
1478     CV_Assert(dft_size_opt.width > 1);\r
1479 \r
1480     cufftHandle plan;\r
1481     if (is_1d_input || is_row_dft)\r
1482         cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);\r
1483     else\r
1484         cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);\r
1485 \r
1486     if (is_complex_input)\r
1487     {\r
1488         if (is_complex_output)\r
1489         {\r
1490             createContinuous(dft_size, CV_32FC2, dst);\r
1491             cufftSafeCall(cufftExecC2C(\r
1492                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),\r
1493                     is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));\r
1494         }\r
1495         else\r
1496         {\r
1497             createContinuous(dft_size, CV_32F, dst);\r
1498             cufftSafeCall(cufftExecC2R(\r
1499                     plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftReal>()));\r
1500         }\r
1501     }\r
1502     else\r
1503     {\r
1504         // We could swap dft_size for efficiency. Here we must reflect it\r
1505         if (dft_size == dft_size_opt)\r
1506             createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);\r
1507         else\r
1508             createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);\r
1509 \r
1510         cufftSafeCall(cufftExecR2C(\r
1511                 plan, src_data.ptr<cufftReal>(), dst.ptr<cufftComplex>()));\r
1512     }\r
1513 \r
1514     cufftSafeCall(cufftDestroy(plan));\r
1515 \r
1516     if (is_scaled_dft)\r
1517         multiply(dst, Scalar::all(1. / dft_size.area()), dst);\r
1518 }\r
1519 \r
1520 //////////////////////////////////////////////////////////////////////////////\r
1521 // convolve\r
1522 \r
1523 \r
1524 void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)\r
1525 {\r
1526     result_size = Size(image_size.width - templ_size.width + 1,\r
1527                        image_size.height - templ_size.height + 1);\r
1528     block_size = estimateBlockSize(result_size, templ_size);\r
1529 \r
1530     dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);\r
1531     dft_size.height = getOptimalDFTSize(block_size.width + templ_size.height - 1);\r
1532     createContinuous(dft_size, CV_32F, image_block);\r
1533     createContinuous(dft_size, CV_32F, templ_block);\r
1534     createContinuous(dft_size, CV_32F, result_data);\r
1535 \r
1536     spect_len = dft_size.height * (dft_size.width / 2 + 1);\r
1537     createContinuous(1, spect_len, CV_32FC2, image_spect);\r
1538     createContinuous(1, spect_len, CV_32FC2, templ_spect);\r
1539     createContinuous(1, spect_len, CV_32FC2, result_spect);\r
1540 \r
1541     block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);\r
1542     block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);\r
1543 }\r
1544 \r
1545 \r
1546 Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)\r
1547 {\r
1548     int scale = 40;\r
1549     Size bsize_min(1024, 1024);\r
1550 \r
1551     // Check whether we use Fermi generation or newer GPU\r
1552     if (DeviceInfo().majorVersion() >= 2)\r
1553     {\r
1554         bsize_min.width = 2048;\r
1555         bsize_min.height = 2048;\r
1556     }\r
1557 \r
1558     Size bsize(std::max(templ_size.width * scale, bsize_min.width),\r
1559                std::max(templ_size.height * scale, bsize_min.height));\r
1560 \r
1561     bsize.width = std::min(bsize.width, result_size.width);\r
1562     bsize.height = std::min(bsize.height, result_size.height);\r
1563     return bsize;\r
1564 }\r
1565 \r
1566 \r
1567 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1568                        bool ccorr)\r
1569 {\r
1570     ConvolveBuf buf;\r
1571     convolve(image, templ, result, ccorr, buf);\r
1572 }\r
1573 \r
1574 \r
1575 void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, \r
1576                        bool ccorr, ConvolveBuf& buf)\r
1577 {\r
1578     StaticAssert<sizeof(float) == sizeof(cufftReal)>::check();\r
1579     StaticAssert<sizeof(float) * 2 == sizeof(cufftComplex)>::check();\r
1580 \r
1581     CV_Assert(image.type() == CV_32F);\r
1582     CV_Assert(templ.type() == CV_32F);\r
1583 \r
1584     buf.create(image.size(), templ.size());\r
1585     result.create(buf.result_size, CV_32F);\r
1586 \r
1587     Size& block_size = buf.block_size;\r
1588     Size& dft_size = buf.dft_size;\r
1589 \r
1590     GpuMat& image_block = buf.image_block;\r
1591     GpuMat& templ_block = buf.templ_block;\r
1592     GpuMat& result_data = buf.result_data;\r
1593 \r
1594     GpuMat& image_spect = buf.image_spect;\r
1595     GpuMat& templ_spect = buf.templ_spect;\r
1596     GpuMat& result_spect = buf.result_spect;\r
1597 \r
1598     cufftHandle planR2C, planC2R;\r
1599     cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
1600     cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
1601 \r
1602     GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
1603     copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
1604                    templ_block.cols - templ_roi.cols, 0);\r
1605 \r
1606     cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
1607                                templ_spect.ptr<cufftComplex>()));\r
1608 \r
1609     // Process all blocks of the result matrix\r
1610     for (int y = 0; y < result.rows; y += block_size.height)\r
1611     {\r
1612         for (int x = 0; x < result.cols; x += block_size.width)\r
1613         {\r
1614             Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,\r
1615                                 std::min(y + dft_size.height, image.rows) - y);\r
1616             GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), \r
1617                              image.step);\r
1618             copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,\r
1619                            0, image_block.cols - image_roi.cols, 0);\r
1620 \r
1621             cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(), \r
1622                                        image_spect.ptr<cufftComplex>()));\r
1623             mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,\r
1624                                  1.f / dft_size.area(), ccorr);\r
1625             cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
1626                                        result_data.ptr<cufftReal>()));\r
1627 \r
1628             Size result_roi_size(std::min(x + block_size.width, result.cols) - x,\r
1629                                  std::min(y + block_size.height, result.rows) - y);\r
1630             GpuMat result_roi(result_roi_size, result.type(), \r
1631                               (void*)(result.ptr<float>(y) + x), result.step);\r
1632             GpuMat result_block(result_roi_size, result_data.type(), \r
1633                                 result_data.ptr(), result_data.step);\r
1634             result_block.copyTo(result_roi);\r
1635         }\r
1636     }\r
1637 \r
1638     cufftSafeCall(cufftDestroy(planR2C));\r
1639     cufftSafeCall(cufftDestroy(planC2R));\r
1640 }\r
1641 \r
1642 //////////////////////////////////////////////////////////////////////////////\r
1643 // pyrDown\r
1644 \r
1645 namespace cv { namespace gpu { namespace imgproc\r
1646 {\r
1647     template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1648 }}}\r
1649 \r
1650 void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)\r
1651 {\r
1652     using namespace cv::gpu::imgproc;\r
1653 \r
1654     typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1655 \r
1656     static const func_t funcs[6][4] = \r
1657     {\r
1658         {pyrDown_gpu<uchar, 1>, pyrDown_gpu<uchar, 2>, pyrDown_gpu<uchar, 3>, pyrDown_gpu<uchar, 4>},\r
1659         {pyrDown_gpu<schar, 1>, pyrDown_gpu<schar, 2>, pyrDown_gpu<schar, 3>, pyrDown_gpu<schar, 4>},\r
1660         {pyrDown_gpu<ushort, 1>, pyrDown_gpu<ushort, 2>, pyrDown_gpu<ushort, 3>, pyrDown_gpu<ushort, 4>},\r
1661         {pyrDown_gpu<short, 1>, pyrDown_gpu<short, 2>, pyrDown_gpu<short, 3>, pyrDown_gpu<short, 4>},\r
1662         {pyrDown_gpu<int, 1>, pyrDown_gpu<int, 2>, pyrDown_gpu<int, 3>, pyrDown_gpu<int, 4>},\r
1663         {pyrDown_gpu<float, 1>, pyrDown_gpu<float, 2>, pyrDown_gpu<float, 3>, pyrDown_gpu<float, 4>},\r
1664     };\r
1665 \r
1666     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
1667 \r
1668     CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
1669     int gpuBorderType;\r
1670     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1671 \r
1672     dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
1673 \r
1674     funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));\r
1675 }\r
1676 \r
1677 \r
1678 //////////////////////////////////////////////////////////////////////////////\r
1679 // pyrUp\r
1680 \r
1681 namespace cv { namespace gpu { namespace imgproc\r
1682 {\r
1683     template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1684 }}}\r
1685 \r
1686 void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)\r
1687 {\r
1688     using namespace cv::gpu::imgproc;\r
1689 \r
1690     typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
1691 \r
1692     static const func_t funcs[6][4] = \r
1693     {\r
1694         {pyrUp_gpu<uchar, 1>, pyrUp_gpu<uchar, 2>, pyrUp_gpu<uchar, 3>, pyrUp_gpu<uchar, 4>},\r
1695         {pyrUp_gpu<schar, 1>, pyrUp_gpu<schar, 2>, pyrUp_gpu<schar, 3>, pyrUp_gpu<schar, 4>},\r
1696         {pyrUp_gpu<ushort, 1>, pyrUp_gpu<ushort, 2>, pyrUp_gpu<ushort, 3>, pyrUp_gpu<ushort, 4>},\r
1697         {pyrUp_gpu<short, 1>, pyrUp_gpu<short, 2>, pyrUp_gpu<short, 3>, pyrUp_gpu<short, 4>},\r
1698         {pyrUp_gpu<int, 1>, pyrUp_gpu<int, 2>, pyrUp_gpu<int, 3>, pyrUp_gpu<int, 4>},\r
1699         {pyrUp_gpu<float, 1>, pyrUp_gpu<float, 2>, pyrUp_gpu<float, 3>, pyrUp_gpu<float, 4>},\r
1700     };\r
1701 \r
1702     CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
1703 \r
1704     CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
1705     int gpuBorderType;\r
1706     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
1707 \r
1708     dst.create(src.rows*2, src.cols*2, src.type());\r
1709 \r
1710     funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));\r
1711 }\r
1712 \r
1713 \r
1714 //////////////////////////////////////////////////////////////////////////////\r
1715 // Canny\r
1716 \r
1717 cv::gpu::CannyBuf::CannyBuf(const GpuMat& dx_, const GpuMat& dy_) : dx(dx_), dy(dy_)\r
1718 {\r
1719     CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());\r
1720 \r
1721     create(dx_.size(), -1);\r
1722 }\r
1723 \r
1724 void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)\r
1725 {\r
1726     ensureSizeIsEnough(image_size, CV_32SC1, dx);\r
1727     ensureSizeIsEnough(image_size, CV_32SC1, dy);\r
1728 \r
1729     if (apperture_size == 3)\r
1730     {\r
1731         ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);\r
1732         ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);\r
1733     }\r
1734     else if(apperture_size > 0)\r
1735     {\r
1736         if (!filterDX)\r
1737             filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);\r
1738         if (!filterDY)\r
1739             filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);\r
1740     }\r
1741 \r
1742     ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);\r
1743 \r
1744     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);\r
1745     ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);\r
1746 }\r
1747 \r
1748 void cv::gpu::CannyBuf::release()\r
1749 {\r
1750     dx.release();\r
1751     dy.release();\r
1752     dx_buf.release();\r
1753     dy_buf.release();\r
1754     edgeBuf.release();\r
1755     trackBuf1.release();\r
1756     trackBuf2.release();\r
1757 }\r
1758 \r
1759 namespace cv { namespace gpu { namespace canny\r
1760 {    \r
1761     void calcSobelRowPass_gpu(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);\r
1762 \r
1763     void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1764     void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad);\r
1765 \r
1766     void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh);\r
1767     \r
1768     void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols);\r
1769 \r
1770     void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);\r
1771 \r
1772     void getEdges_gpu(PtrStepi map, PtrStep dst, int rows, int cols);\r
1773 }}}\r
1774 \r
1775 namespace\r
1776 {\r
1777     void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)\r
1778     {\r
1779         using namespace cv::gpu::canny;\r
1780 \r
1781         calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);\r
1782         \r
1783         edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), dst.rows, dst.cols);\r
1784         \r
1785         edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1.ptr<ushort2>(), buf.trackBuf2.ptr<ushort2>(), dst.rows, dst.cols);\r
1786         \r
1787         getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);\r
1788     }\r
1789 }\r
1790 \r
1791 void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1792 {\r
1793     CannyBuf buf(src.size(), apperture_size);\r
1794     Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);\r
1795 }\r
1796 \r
1797 void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)\r
1798 {\r
1799     using namespace cv::gpu::canny;\r
1800 \r
1801     CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
1802     CV_Assert(src.type() == CV_8UC1);\r
1803 \r
1804     if( low_thresh > high_thresh )\r
1805         std::swap( low_thresh, high_thresh);\r
1806 \r
1807     dst.create(src.size(), CV_8U);\r
1808     dst.setTo(Scalar::all(0));\r
1809     \r
1810     buf.create(src.size(), apperture_size);\r
1811     buf.edgeBuf.setTo(Scalar::all(0));\r
1812 \r
1813     if (apperture_size == 3)\r
1814     {\r
1815         calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);\r
1816 \r
1817         calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1818     }\r
1819     else\r
1820     {\r
1821         buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows));\r
1822         buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows));\r
1823 \r
1824         calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);\r
1825     }\r
1826 \r
1827     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1828 }\r
1829 \r
1830 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1831 {\r
1832     CannyBuf buf(dx, dy);\r
1833     Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);\r
1834 }\r
1835 \r
1836 void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)\r
1837 {\r
1838     using namespace cv::gpu::canny;\r
1839 \r
1840     CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
1841     CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());\r
1842 \r
1843     if( low_thresh > high_thresh )\r
1844         std::swap( low_thresh, high_thresh);\r
1845 \r
1846     dst.create(dx.size(), CV_8U);\r
1847     dst.setTo(Scalar::all(0));\r
1848     \r
1849     buf.dx = dx; buf.dy = dy;\r
1850     buf.create(dx.size(), -1);\r
1851     buf.edgeBuf.setTo(Scalar::all(0));\r
1852 \r
1853     calcMagnitude_gpu(dx, dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);\r
1854 \r
1855     CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));\r
1856 }\r
1857 \r
1858 #endif /* !defined (HAVE_CUDA) */\r
1859 \r
1860 \r