modules/gpu/src/filtering.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44
  45 using namespace cv;
  46 using namespace cv::gpu;
  47
  48
  49 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
  50
  51 Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  52 Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  53 Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  54 Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
  55 Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
  56 Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
  57 Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int, int, const Size&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  58 Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
  59 Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  60 Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  61 Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
  62 Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  63 Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }
  64 Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }
  65 Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  66 Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, GpuMat&, const Point&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  67 Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  68 Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int, GpuMat&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  69 Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  70 Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, GpuMat&, double, double, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
  71 Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
  72 Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
  73
  74 void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point, Stream&) { throw_nogpu(); }
  75 void cv::gpu::erode(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
  76 void cv::gpu::erode(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_nogpu(); }
  77 void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
  78 void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_nogpu(); }
  79 void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }
  80 void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_nogpu(); }
  81 void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, int, Stream&) { throw_nogpu(); }
  82 void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_nogpu(); }
  83 void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_nogpu(); }
  84 void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_nogpu(); }
  85 void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, GpuMat&, int, double, int, int, Stream&) { throw_nogpu(); }
  86 void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double, int, int) { throw_nogpu(); }
  87 void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, GpuMat&, double, int, int, Stream&) { throw_nogpu(); }
  88 void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double, int, int) { throw_nogpu(); }
  89 void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, GpuMat&, double, double, int, int, Stream&) { throw_nogpu(); }
  90 void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double, int, Stream&) { throw_nogpu(); }
  91
  92 #else
  93
  94 namespace
  95 {
  96     inline void normalizeAnchor(int& anchor, int ksize)
  97     {
  98         if (anchor < 0)
  99             anchor = ksize >> 1;
 100
 101         CV_Assert(0 <= anchor && anchor < ksize);
 102     }
 103
 104     inline void normalizeAnchor(Point& anchor, const Size& ksize)
 105     {
 106         normalizeAnchor(anchor.x, ksize.width);
 107         normalizeAnchor(anchor.y, ksize.height);
 108     }
 109
 110     inline void normalizeROI(Rect& roi, const Size& ksize, const Point& anchor, const Size& src_size)
 111     {
 112         if (roi == Rect(0,0,-1,-1))
 113             roi = Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height);
 114
 115         CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height);
 116     }
 117
 118     inline void normalizeKernel(const Mat& kernel, GpuMat& gpu_krnl, int type = CV_8U, int* nDivisor = 0, bool reverse = false)
 119     {
 120         int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;
 121         if (nDivisor) *nDivisor = scale;
 122
 123         Mat temp(kernel.size(), type);
 124         kernel.convertTo(temp, type, scale);
 125         Mat cont_krnl = temp.reshape(1, 1);
 126
 127         if (reverse)
 128         {
 129             int count = cont_krnl.cols >> 1;
 130             for (int i = 0; i < count; ++i)
 131             {
 132                 std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));
 133             }
 134         }
 135
 136         gpu_krnl.upload(cont_krnl);
 137     }
 138 }
 139
 140 ////////////////////////////////////////////////////////////////////////////////////////////////////
 141 // Filter2D
 142
 143 namespace
 144 {
 145     struct Filter2DEngine_GPU : public FilterEngine_GPU
 146     {
 147         Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int srcType_, int dstType_) :
 148             filter2D(filter2D_), srcType(srcType_), dstType(dstType_)
 149         {}
 150
 151         virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null())
 152         {
 153             CV_Assert(src.type() == srcType);
 154
 155             Size src_size = src.size();
 156
 157             dst.create(src_size, dstType);
 158
 159             if (roi.size() != src_size)
 160             {
 161                 if (stream)
 162                     stream.enqueueMemSet(dst, Scalar::all(0));
 163                 else
 164                     dst.setTo(Scalar::all(0));
 165             }
 166
 167             normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
 168
 169             GpuMat srcROI = src(roi);
 170             GpuMat dstROI = dst(roi);
 171
 172             (*filter2D)(srcROI, dstROI, stream);
 173         }
 174
 175         Ptr<BaseFilter_GPU> filter2D;
 176         int srcType, dstType;
 177     };
 178 }
 179
 180 Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>& filter2D, int srcType, int dstType)
 181 {
 182     return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D, srcType, dstType));
 183 }
 184
 185 ////////////////////////////////////////////////////////////////////////////////////////////////////
 186 // SeparableFilter
 187
 188 namespace
 189 {
 190     struct SeparableFilterEngine_GPU : public FilterEngine_GPU
 191     {
 192         SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, const Ptr<BaseColumnFilter_GPU>& columnFilter_,
 193                                   int srcType_, int bufType_, int dstType_) :
 194             rowFilter(rowFilter_), columnFilter(columnFilter_),
 195             srcType(srcType_), bufType(bufType_), dstType(dstType_)
 196         {
 197             ksize = Size(rowFilter->ksize, columnFilter->ksize);
 198             anchor = Point(rowFilter->anchor, columnFilter->anchor);
 199
 200             pbuf = &buf;
 201         }
 202
 203         SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, const Ptr<BaseColumnFilter_GPU>& columnFilter_,
 204                                   int srcType_, int bufType_, int dstType_,
 205                                   GpuMat& buf_) :
 206             rowFilter(rowFilter_), columnFilter(columnFilter_),
 207             srcType(srcType_), bufType(bufType_), dstType(dstType_)
 208         {
 209             ksize = Size(rowFilter->ksize, columnFilter->ksize);
 210             anchor = Point(rowFilter->anchor, columnFilter->anchor);
 211
 212             pbuf = &buf_;
 213         }
 214
 215         virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null())
 216         {
 217             CV_Assert(src.type() == srcType);
 218
 219             Size src_size = src.size();
 220
 221             dst.create(src_size, dstType);
 222
 223             if (roi.size() != src_size)
 224             {
 225                 if (stream)
 226                     stream.enqueueMemSet(dst, Scalar::all(0));
 227                 else
 228                     dst.setTo(Scalar::all(0));
 229             }
 230
 231             ensureSizeIsEnough(src_size, bufType, *pbuf);
 232
 233             normalizeROI(roi, ksize, anchor, src_size);
 234
 235             GpuMat srcROI = src(roi);
 236             GpuMat dstROI = dst(roi);
 237             GpuMat bufROI = (*pbuf)(roi);
 238
 239             (*rowFilter)(srcROI, bufROI, stream);
 240             (*columnFilter)(bufROI, dstROI, stream);
 241         }
 242
 243         Ptr<BaseRowFilter_GPU> rowFilter;
 244         Ptr<BaseColumnFilter_GPU> columnFilter;
 245
 246         int srcType, bufType, dstType;
 247
 248         Size ksize;
 249         Point anchor;
 250
 251         GpuMat buf;
 252         GpuMat* pbuf;
 253     };
 254 }
 255
 256 Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
 257     const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType)
 258 {
 259     return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType));
 260 }
 261
 262 Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
 263     const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf)
 264 {
 265     return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType, buf));
 266 }
 267
 268 ////////////////////////////////////////////////////////////////////////////////////////////////////
 269 // 1D Sum Filter
 270
 271 namespace
 272 {
 273     struct NppRowSumFilter : public BaseRowFilter_GPU
 274     {
 275         NppRowSumFilter(int ksize_, int anchor_) : BaseRowFilter_GPU(ksize_, anchor_) {}
 276
 277         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 278         {
 279             NppiSize sz;
 280             sz.width = src.cols;
 281             sz.height = src.rows;
 282
 283             cudaStream_t stream = StreamAccessor::getStream(s);
 284
 285             NppStreamHandler h(stream);
 286
 287             nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
 288                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
 289
 290             if (stream == 0)
 291                 cudaSafeCall( cudaDeviceSynchronize() );
 292         }
 293     };
 294 }
 295
 296 Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor)
 297 {
 298     CV_Assert(srcType == CV_8UC1 && sumType == CV_32FC1);
 299
 300     normalizeAnchor(anchor, ksize);
 301
 302     return Ptr<BaseRowFilter_GPU>(new NppRowSumFilter(ksize, anchor));
 303 }
 304
 305 namespace
 306 {
 307     struct NppColumnSumFilter : public BaseColumnFilter_GPU
 308     {
 309         NppColumnSumFilter(int ksize_, int anchor_) : BaseColumnFilter_GPU(ksize_, anchor_) {}
 310
 311         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 312         {
 313             NppiSize sz;
 314             sz.width = src.cols;
 315             sz.height = src.rows;
 316
 317             cudaStream_t stream = StreamAccessor::getStream(s);
 318
 319             NppStreamHandler h(stream);
 320
 321             nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
 322                 dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
 323
 324             if (stream == 0)
 325                 cudaSafeCall( cudaDeviceSynchronize() );
 326         }
 327     };
 328 }
 329
 330 Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor)
 331 {
 332     CV_Assert(sumType == CV_8UC1 && dstType == CV_32FC1);
 333
 334     normalizeAnchor(anchor, ksize);
 335
 336     return Ptr<BaseColumnFilter_GPU>(new NppColumnSumFilter(ksize, anchor));
 337 }
 338
 339 ////////////////////////////////////////////////////////////////////////////////////////////////////
 340 // Box Filter
 341
 342 namespace
 343 {
 344     typedef NppStatus (*nppFilterBox_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
 345         NppiSize oMaskSize, NppiPoint oAnchor);
 346
 347     struct NPPBoxFilter : public BaseFilter_GPU
 348     {
 349         NPPBoxFilter(const Size& ksize_, const Point& anchor_, nppFilterBox_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}
 350
 351         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 352         {
 353             NppiSize sz;
 354             sz.width = src.cols;
 355             sz.height = src.rows;
 356             NppiSize oKernelSize;
 357             oKernelSize.height = ksize.height;
 358             oKernelSize.width = ksize.width;
 359             NppiPoint oAnchor;
 360             oAnchor.x = anchor.x;
 361             oAnchor.y = anchor.y;
 362
 363             cudaStream_t stream = StreamAccessor::getStream(s);
 364
 365             NppStreamHandler h(stream);
 366
 367             nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step),
 368                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
 369
 370             if (stream == 0)
 371                 cudaSafeCall( cudaDeviceSynchronize() );
 372         }
 373
 374         nppFilterBox_t func;
 375     };
 376 }
 377
 378 Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)
 379 {
 380     static const nppFilterBox_t nppFilterBox_callers[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R};
 381
 382     CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
 383
 384     normalizeAnchor(anchor, ksize);
 385
 386     return Ptr<BaseFilter_GPU>(new NPPBoxFilter(ksize, anchor, nppFilterBox_callers[CV_MAT_CN(srcType)]));
 387 }
 388
 389 Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, const Point& anchor)
 390 {
 391     Ptr<BaseFilter_GPU> boxFilter = getBoxFilter_GPU(srcType, dstType, ksize, anchor);
 392     return createFilter2D_GPU(boxFilter, srcType, dstType);
 393 }
 394
 395 void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor, Stream& stream)
 396 {
 397     int sdepth = src.depth(), cn = src.channels();
 398     if( ddepth < 0 )
 399         ddepth = sdepth;
 400
 401     dst.create(src.size(), CV_MAKETYPE(ddepth, cn));
 402
 403     Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor);
 404     f->apply(src, dst, Rect(0,0,-1,-1), stream);
 405 }
 406
 407 ////////////////////////////////////////////////////////////////////////////////////////////////////
 408 // Morphology Filter
 409
 410 namespace
 411 {
 412     typedef NppStatus (*nppMorfFilter_t)(const Npp8u*, Npp32s, Npp8u*, Npp32s, NppiSize, const Npp8u*, NppiSize, NppiPoint);
 413
 414     struct NPPMorphFilter : public BaseFilter_GPU
 415     {
 416         NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) :
 417             BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
 418
 419         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 420         {
 421             NppiSize sz;
 422             sz.width = src.cols;
 423             sz.height = src.rows;
 424             NppiSize oKernelSize;
 425             oKernelSize.height = ksize.height;
 426             oKernelSize.width = ksize.width;
 427             NppiPoint oAnchor;
 428             oAnchor.x = anchor.x;
 429             oAnchor.y = anchor.y;
 430
 431             cudaStream_t stream = StreamAccessor::getStream(s);
 432
 433             NppStreamHandler h(stream);
 434
 435             nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step),
 436                 dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );
 437
 438             if (stream == 0)
 439                 cudaSafeCall( cudaDeviceSynchronize() );
 440         }
 441
 442         GpuMat kernel;
 443         nppMorfFilter_t func;
 444     };
 445 }
 446
 447 Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, Point anchor)
 448 {
 449     static const nppMorfFilter_t nppMorfFilter_callers[2][5] =
 450     {
 451         {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },
 452         {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }
 453     };
 454
 455     CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
 456     CV_Assert(type == CV_8UC1 || type == CV_8UC4);
 457
 458     GpuMat gpu_krnl;
 459     normalizeKernel(kernel, gpu_krnl);
 460     normalizeAnchor(anchor, ksize);
 461
 462     return Ptr<BaseFilter_GPU>(new NPPMorphFilter(ksize, anchor, gpu_krnl, nppMorfFilter_callers[op][CV_MAT_CN(type)]));
 463 }
 464
 465 namespace
 466 {
 467     struct MorphologyFilterEngine_GPU : public FilterEngine_GPU
 468     {
 469         MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type_, int iters_) :
 470             filter2D(filter2D_), type(type_), iters(iters_)
 471         {
 472             pbuf = &buf;
 473         }
 474
 475         MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type_, int iters_, GpuMat& buf_) :
 476             filter2D(filter2D_), type(type_), iters(iters_)
 477         {
 478             pbuf = &buf_;
 479         }
 480
 481         virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null())
 482         {
 483             CV_Assert(src.type() == type);
 484
 485             Size src_size = src.size();
 486
 487             dst.create(src_size, type);
 488
 489             if (roi.size() != src_size)
 490             {
 491                 if (stream)
 492                     stream.enqueueMemSet(dst, Scalar::all(0));
 493                 else
 494                     dst.setTo(Scalar::all(0));
 495             }
 496
 497             normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);
 498
 499             if (iters > 1)
 500                 pbuf->create(src_size, type);
 501
 502             GpuMat srcROI = src(roi);
 503             GpuMat dstROI = dst(roi);
 504
 505             (*filter2D)(srcROI, dstROI, stream);
 506
 507             for(int i = 1; i < iters; ++i)
 508             {
 509                 dst.swap((*pbuf));
 510
 511                 dstROI = dst(roi);
 512                 GpuMat bufROI = (*pbuf)(roi);
 513
 514                 (*filter2D)(bufROI, dstROI, stream);
 515             }
 516         }
 517
 518         Ptr<BaseFilter_GPU> filter2D;
 519
 520         int type;
 521         int iters;
 522
 523         GpuMat buf;
 524         GpuMat* pbuf;
 525     };
 526 }
 527
 528 Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Point& anchor, int iterations)
 529 {
 530     CV_Assert(iterations > 0);
 531
 532     Size ksize = kernel.size();
 533
 534     Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);
 535
 536     return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, type, iterations));
 537 }
 538
 539 Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf, const Point& anchor, int iterations)
 540 {
 541     CV_Assert(iterations > 0);
 542
 543     Size ksize = kernel.size();
 544
 545     Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);
 546
 547     return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, type, iterations, buf));
 548 }
 549
 550 namespace
 551 {
 552     void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, GpuMat& buf, Point anchor, int iterations, Stream& stream = Stream::Null())
 553     {
 554         Mat kernel;
 555         Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
 556
 557         normalizeAnchor(anchor, ksize);
 558
 559         if (iterations == 0 || _kernel.rows * _kernel.cols == 1)
 560         {
 561             if (stream)
 562                 stream.enqueueCopy(src, dst);
 563             else
 564                 src.copyTo(dst);
 565             return;
 566         }
 567
 568         dst.create(src.size(), src.type());
 569
 570         if (!_kernel.data)
 571         {
 572             kernel = getStructuringElement(MORPH_RECT, Size(1 + iterations * 2, 1 + iterations * 2));
 573             anchor = Point(iterations, iterations);
 574             iterations = 1;
 575         }
 576         else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols)
 577         {
 578             anchor = Point(anchor.x * iterations, anchor.y * iterations);
 579             kernel = getStructuringElement(MORPH_RECT,
 580                                            Size(ksize.width + (iterations - 1) * (ksize.width - 1),
 581                                                 ksize.height + (iterations - 1) * (ksize.height - 1)),
 582                                            anchor);
 583             iterations = 1;
 584         }
 585         else
 586             kernel = _kernel;
 587
 588         Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, buf, anchor, iterations);
 589
 590         f->apply(src, dst, Rect(0,0,-1,-1), stream);
 591     }
 592
 593     void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, Point anchor, int iterations)
 594     {
 595         GpuMat buf;
 596         morphOp(op, src, dst, _kernel, buf, anchor, iterations);
 597     }
 598 }
 599
 600 void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)
 601 {
 602     morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations);
 603 }
 604
 605 void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor, int iterations, Stream& stream)
 606 {
 607     morphOp(MORPH_ERODE, src, dst, kernel, buf, anchor, iterations, stream);
 608 }
 609
 610 void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)
 611 {
 612     morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations);
 613 }
 614
 615 void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor, int iterations, Stream& stream)
 616 {
 617     morphOp(MORPH_DILATE, src, dst, kernel, buf, anchor, iterations, stream);
 618 }
 619
 620 void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations)
 621 {
 622     GpuMat buf1;
 623     GpuMat buf2;
 624     morphologyEx(src, dst, op, kernel, buf1, buf2, anchor, iterations);
 625 }
 626
 627 void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor, int iterations, Stream& stream)
 628 {
 629     switch( op )
 630     {
 631     case MORPH_ERODE:   erode(src, dst, kernel, buf1, anchor, iterations, stream); break;
 632     case MORPH_DILATE: dilate(src, dst, kernel, buf1, anchor, iterations, stream); break;
 633     case MORPH_OPEN:
 634         erode(src, buf2, kernel, buf1, anchor, iterations, stream);
 635         dilate(buf2, dst, kernel, buf1, anchor, iterations, stream);
 636         break;
 637     case CV_MOP_CLOSE:
 638         dilate(src, buf2, kernel, buf1, anchor, iterations, stream);
 639         erode(buf2, dst, kernel, buf1, anchor, iterations, stream);
 640         break;
 641     case CV_MOP_GRADIENT:
 642         erode(src, buf2, kernel, buf1, anchor, iterations, stream);
 643         dilate(src, dst, kernel, buf1, anchor, iterations, stream);
 644         subtract(dst, buf2, dst, GpuMat(), -1, stream);
 645         break;
 646     case CV_MOP_TOPHAT:
 647         erode(src, dst, kernel, buf1, anchor, iterations, stream);
 648         dilate(dst, buf2, kernel, buf1, anchor, iterations, stream);
 649         subtract(src, buf2, dst, GpuMat(), -1, stream);
 650         break;
 651     case CV_MOP_BLACKHAT:
 652         dilate(src, dst, kernel, buf1, anchor, iterations, stream);
 653         erode(dst, buf2, kernel, buf1, anchor, iterations, stream);
 654         subtract(buf2, src, dst, GpuMat(), -1, stream);
 655         break;
 656     default:
 657         CV_Error(CV_StsBadArg, "unknown morphological operation");
 658     }
 659 }
 660
 661 ////////////////////////////////////////////////////////////////////////////////////////////////////
 662 // Linear Filter
 663
 664 namespace cv { namespace gpu { namespace device
 665 {
 666     namespace imgproc
 667     {
 668         template <typename T, typename D>
 669         void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
 670                           int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
 671                           int borderMode, const float* borderValue, cudaStream_t stream);
 672     }
 673 }}}
 674
 675 namespace
 676 {
 677     typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
 678         const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);
 679
 680     struct NPPLinearFilter : public BaseFilter_GPU
 681     {
 682         NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) :
 683             BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
 684
 685         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 686         {
 687             NppiSize sz;
 688             sz.width = src.cols;
 689             sz.height = src.rows;
 690             NppiSize oKernelSize;
 691             oKernelSize.height = ksize.height;
 692             oKernelSize.width = ksize.width;
 693             NppiPoint oAnchor;
 694             oAnchor.x = anchor.x;
 695             oAnchor.y = anchor.y;
 696
 697             cudaStream_t stream = StreamAccessor::getStream(s);
 698
 699             NppStreamHandler h(stream);
 700
 701             nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
 702                 kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
 703
 704             if (stream == 0)
 705                 cudaSafeCall( cudaDeviceSynchronize() );
 706         }
 707
 708         GpuMat kernel;
 709         Npp32s nDivisor;
 710         nppFilter2D_t func;
 711     };
 712
 713     typedef void (*gpuFilter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
 714                                    int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
 715                                    int borderMode, const float* borderValue, cudaStream_t stream);
 716
 717     struct GpuFilter2D : public BaseFilter_GPU
 718     {
 719         GpuFilter2D(Size ksize_, Point anchor_, gpuFilter2D_t func_, const GpuMat& kernel_, int brd_type_) :
 720             BaseFilter_GPU(ksize_, anchor_), func(func_), kernel(kernel_), brd_type(brd_type_)
 721         {
 722         }
 723
 724         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
 725         {
 726             using namespace cv::gpu::device::imgproc;
 727
 728             Point ofs;
 729             Size wholeSize;
 730             src.locateROI(wholeSize, ofs);
 731             GpuMat srcWhole(wholeSize, src.type(), src.datastart);
 732
 733             static const Scalar_<float> zero = Scalar_<float>::all(0.0f);
 734             func(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr<float>(), brd_type, zero.val, StreamAccessor::getStream(stream));
 735         }
 736
 737         gpuFilter2D_t func;
 738         GpuMat kernel;
 739         int brd_type;
 740     };
 741 }
 742
 743 Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int brd_type)
 744 {
 745     using namespace cv::gpu::device::imgproc;
 746
 747     int sdepth = CV_MAT_DEPTH(srcType);
 748     int scn = CV_MAT_CN(srcType);
 749
 750     CV_Assert(sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F);
 751     CV_Assert(scn == 1 || scn == 4);
 752     CV_Assert(dstType == srcType);
 753     CV_Assert(brd_type == BORDER_REFLECT101 || brd_type == BORDER_REPLICATE || brd_type == BORDER_CONSTANT || brd_type == BORDER_REFLECT || brd_type == BORDER_WRAP);
 754
 755     Size ksize = kernel.size();
 756
 757 #if 0
 758     if ((srcType == CV_8UC1 || srcType == CV_8UC4) && brd_type == BORDER_CONSTANT)
 759     {
 760         static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};
 761
 762         GpuMat gpu_krnl;
 763         int nDivisor;
 764         normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
 765
 766         normalizeAnchor(anchor, ksize);
 767
 768         return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));
 769     }
 770 #endif
 771
 772     CV_Assert(ksize.width * ksize.height <= 16 * 16);
 773
 774     int gpuBorderType;
 775     CV_Assert( tryConvertToGpuBorderType(brd_type, gpuBorderType) );
 776
 777     GpuMat gpu_krnl;
 778     normalizeKernel(kernel, gpu_krnl, CV_32F);
 779
 780     normalizeAnchor(anchor, ksize);
 781
 782     gpuFilter2D_t func = 0;
 783
 784     switch (srcType)
 785     {
 786     case CV_8UC1:
 787         func = filter2D_gpu<uchar, uchar>;
 788         break;
 789     case CV_8UC4:
 790         func = filter2D_gpu<uchar4, uchar4>;
 791         break;
 792     case CV_16UC1:
 793         func = filter2D_gpu<ushort, ushort>;
 794         break;
 795     case CV_16UC4:
 796         func = filter2D_gpu<ushort4, ushort4>;
 797         break;
 798     case CV_32FC1:
 799         func = filter2D_gpu<float, float>;
 800         break;
 801     case CV_32FC4:
 802         func = filter2D_gpu<float4, float4>;
 803         break;
 804     }
 805
 806     return Ptr<BaseFilter_GPU>(new GpuFilter2D(ksize, anchor, func, gpu_krnl, gpuBorderType));
 807 }
 808
 809 Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int borderType)
 810 {
 811     Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, anchor, borderType);
 812
 813     return createFilter2D_GPU(linearFilter, srcType, dstType);
 814 }
 815
 816 void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, int borderType, Stream& stream)
 817 {
 818     if (ddepth < 0)
 819         ddepth = src.depth();
 820
 821     int dst_type = CV_MAKE_TYPE(ddepth, src.channels());
 822
 823     Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst_type, kernel, anchor, borderType);
 824
 825     dst.create(src.size(), dst_type);
 826
 827     f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
 828 }
 829
 830 ////////////////////////////////////////////////////////////////////////////////////////////////////
 831 // Separable Linear Filter
 832
 833 namespace cv { namespace gpu { namespace device
 834 {
 835     namespace row_filter
 836     {
 837         template <typename T, typename D>
 838         void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
 839     }
 840
 841     namespace column_filter
 842     {
 843         template <typename T, typename D>
 844         void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
 845     }
 846 }}}
 847
 848 namespace
 849 {
 850     typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
 851         const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
 852
 853     typedef void (*gpuFilter1D_t)(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
 854
 855     struct NppLinearRowFilter : public BaseRowFilter_GPU
 856     {
 857         NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
 858             BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
 859
 860         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 861         {
 862             NppiSize sz;
 863             sz.width = src.cols;
 864             sz.height = src.rows;
 865
 866             cudaStream_t stream = StreamAccessor::getStream(s);
 867
 868             NppStreamHandler h(stream);
 869
 870             nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
 871                 kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
 872
 873             if (stream == 0)
 874                 cudaSafeCall( cudaDeviceSynchronize() );
 875         }
 876
 877         GpuMat kernel;
 878         Npp32s nDivisor;
 879         nppFilter1D_t func;
 880     };
 881
 882     struct GpuLinearRowFilter : public BaseRowFilter_GPU
 883     {
 884         GpuLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, gpuFilter1D_t func_, int brd_type_) :
 885             BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
 886
 887         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 888         {
 889             DeviceInfo devInfo;
 890             int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
 891             func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
 892         }
 893
 894         GpuMat kernel;
 895         gpuFilter1D_t func;
 896         int brd_type;
 897     };
 898 }
 899
 900 Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
 901 {
 902     using namespace ::cv::gpu::device::row_filter;
 903
 904     static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
 905
 906     if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4))
 907     {
 908         CV_Assert(borderType == BORDER_CONSTANT);
 909
 910         GpuMat gpu_row_krnl;
 911         int nDivisor;
 912         normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nDivisor, true);
 913
 914         int ksize = gpu_row_krnl.cols;
 915         normalizeAnchor(anchor, ksize);
 916
 917         return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor,
 918             nppFilter1D_callers[CV_MAT_CN(srcType)]));
 919     }
 920
 921     CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
 922     int gpuBorderType;
 923     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
 924
 925     CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4);
 926
 927     CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));
 928
 929     GpuMat gpu_row_krnl;
 930     normalizeKernel(rowKernel, gpu_row_krnl, CV_32F);
 931
 932     int ksize = gpu_row_krnl.cols;
 933
 934     CV_Assert(ksize > 0 && ksize <= 32);
 935
 936     normalizeAnchor(anchor, ksize);
 937
 938     gpuFilter1D_t func = 0;
 939
 940     switch (srcType)
 941     {
 942     case CV_8UC1:
 943         func = linearRowFilter_gpu<uchar, float>;
 944         break;
 945     case CV_8UC3:
 946         func = linearRowFilter_gpu<uchar3, float3>;
 947         break;
 948     case CV_8UC4:
 949         func = linearRowFilter_gpu<uchar4, float4>;
 950         break;
 951     case CV_16SC3:
 952         func = linearRowFilter_gpu<short3, float3>;
 953         break;
 954     case CV_32SC1:
 955         func = linearRowFilter_gpu<int, float>;
 956         break;
 957     case CV_32FC1:
 958         func = linearRowFilter_gpu<float, float>;
 959         break;
 960     case CV_32FC3:
 961         func = linearRowFilter_gpu<float3, float3>;
 962         break;
 963     case CV_32FC4:
 964         func = linearRowFilter_gpu<float4, float4>;
 965         break;
 966     }
 967
 968     return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, gpu_row_krnl, func, gpuBorderType));
 969 }
 970
 971 namespace
 972 {
 973     struct NppLinearColumnFilter : public BaseColumnFilter_GPU
 974     {
 975         NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
 976             BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
 977
 978         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
 979         {
 980             NppiSize sz;
 981             sz.width = src.cols;
 982             sz.height = src.rows;
 983
 984             cudaStream_t stream = StreamAccessor::getStream(s);
 985
 986             NppStreamHandler h(stream);
 987
 988             nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
 989                 kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
 990
 991             if (stream == 0)
 992                 cudaSafeCall( cudaDeviceSynchronize() );
 993         }
 994
 995         GpuMat kernel;
 996         Npp32s nDivisor;
 997         nppFilter1D_t func;
 998     };
 999
1000     struct GpuLinearColumnFilter : public BaseColumnFilter_GPU
1001     {
1002         GpuLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, gpuFilter1D_t func_, int brd_type_) :
1003             BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
1004
1005         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
1006         {
1007             DeviceInfo devInfo;
1008             int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
1009             if (ksize > 16 && cc < 20)
1010                 CV_Error(CV_StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");
1011
1012             func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
1013         }
1014
1015         GpuMat kernel;
1016         gpuFilter1D_t func;
1017         int brd_type;
1018     };
1019 }
1020
1021 Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
1022 {
1023     using namespace ::cv::gpu::device::column_filter;
1024
1025     static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
1026
1027     if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4))
1028     {
1029         CV_Assert(borderType == BORDER_CONSTANT);
1030
1031         GpuMat gpu_col_krnl;
1032         int nDivisor;
1033         normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nDivisor, true);
1034
1035         int ksize = gpu_col_krnl.cols;
1036         normalizeAnchor(anchor, ksize);
1037
1038         return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor,
1039             nppFilter1D_callers[CV_MAT_CN(bufType)]));
1040     }
1041
1042     CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
1043     int gpuBorderType;
1044     CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
1045
1046     CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC3 || dstType == CV_8UC4 || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1 || dstType == CV_32FC3 || dstType == CV_32FC4);
1047
1048     CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
1049
1050     GpuMat gpu_col_krnl;
1051     normalizeKernel(columnKernel, gpu_col_krnl, CV_32F);
1052
1053     int ksize = gpu_col_krnl.cols;
1054
1055     CV_Assert(ksize > 0 && ksize <= 32);
1056
1057     normalizeAnchor(anchor, ksize);
1058
1059     gpuFilter1D_t func = 0;
1060
1061     switch (dstType)
1062     {
1063     case CV_8UC1:
1064         func = linearColumnFilter_gpu<float, uchar>;
1065         break;
1066     case CV_8UC3:
1067         func = linearColumnFilter_gpu<float3, uchar3>;
1068         break;
1069     case CV_8UC4:
1070         func = linearColumnFilter_gpu<float4, uchar4>;
1071         break;
1072     case CV_16SC3:
1073         func = linearColumnFilter_gpu<float3, short3>;
1074         break;
1075     case CV_32SC1:
1076         func = linearColumnFilter_gpu<float, int>;
1077         break;
1078     case CV_32FC1:
1079         func = linearColumnFilter_gpu<float, float>;
1080         break;
1081     case CV_32FC3:
1082         func = linearColumnFilter_gpu<float3, float3>;
1083         break;
1084     case CV_32FC4:
1085         func = linearColumnFilter_gpu<float4, float4>;
1086         break;
1087     }
1088
1089     return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, gpu_col_krnl, func, gpuBorderType));
1090 }
1091
1092 Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,
1093     const Point& anchor, int rowBorderType, int columnBorderType)
1094 {
1095     if (columnBorderType < 0)
1096         columnBorderType = rowBorderType;
1097
1098     int cn = CV_MAT_CN(srcType);
1099     int bdepth = CV_32F;
1100     int bufType = CV_MAKETYPE(bdepth, cn);
1101
1102     Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, rowBorderType);
1103     Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, columnBorderType);
1104
1105     return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType);
1106 }
1107
1108 Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, GpuMat& buf,
1109     const Point& anchor, int rowBorderType, int columnBorderType)
1110 {
1111     if (columnBorderType < 0)
1112         columnBorderType = rowBorderType;
1113
1114     int cn = CV_MAT_CN(srcType);
1115     int bdepth = CV_32F;
1116     int bufType = CV_MAKETYPE(bdepth, cn);
1117
1118     Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, rowBorderType);
1119     Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, columnBorderType);
1120
1121     return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType, buf);
1122 }
1123
1124 void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
1125                           Point anchor, int rowBorderType, int columnBorderType)
1126 {
1127     if( ddepth < 0 )
1128         ddepth = src.depth();
1129
1130     dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
1131
1132     Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowBorderType, columnBorderType);
1133     f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
1134 }
1135
1136 void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
1137                           Point anchor, int rowBorderType, int columnBorderType,
1138                           Stream& stream)
1139 {
1140     if( ddepth < 0 )
1141         ddepth = src.depth();
1142
1143     dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
1144
1145     Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, buf, anchor, rowBorderType, columnBorderType);
1146     f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
1147 }
1148
1149 ////////////////////////////////////////////////////////////////////////////////////////////////////
1150 // Deriv Filter
1151
1152 Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int rowBorderType, int columnBorderType)
1153 {
1154     Mat kx, ky;
1155     getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
1156     return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
1157 }
1158
1159 Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf, int rowBorderType, int columnBorderType)
1160 {
1161     Mat kx, ky;
1162     getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
1163     return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType);
1164 }
1165
1166 void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType)
1167 {
1168     GpuMat buf;
1169     Sobel(src, dst, ddepth, dx, dy, buf, ksize, scale, rowBorderType, columnBorderType);
1170 }
1171
1172 void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize, double scale, int rowBorderType, int columnBorderType, Stream& stream)
1173 {
1174     Mat kx, ky;
1175     getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
1176
1177     if (scale != 1)
1178     {
1179         // usually the smoothing part is the slowest to compute,
1180         // so try to scale it instead of the faster differenciating part
1181         if (dx == 0)
1182             kx *= scale;
1183         else
1184             ky *= scale;
1185     }
1186
1187     sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream);
1188 }
1189
1190 void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType)
1191 {
1192     GpuMat buf;
1193     Scharr(src, dst, ddepth, dx, dy, buf, scale, rowBorderType, columnBorderType);
1194 }
1195
1196 void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale, int rowBorderType, int columnBorderType, Stream& stream)
1197 {
1198     Mat kx, ky;
1199     getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);
1200
1201     if( scale != 1 )
1202     {
1203         // usually the smoothing part is the slowest to compute,
1204         // so try to scale it instead of the faster differenciating part
1205         if( dx == 0 )
1206             kx *= scale;
1207         else
1208             ky *= scale;
1209     }
1210
1211     sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream);
1212 }
1213
1214 void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream)
1215 {
1216     CV_Assert(ksize == 1 || ksize == 3);
1217
1218     static const int K[2][9] =
1219     {
1220         {0, 1, 0, 1, -4, 1, 0, 1, 0},
1221         {2, 0, 2, 0, -8, 0, 2, 0, 2}
1222     };
1223     Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);
1224     if (scale != 1)
1225         kernel *= scale;
1226
1227     filter2D(src, dst, ddepth, kernel, Point(-1,-1), borderType, stream);
1228 }
1229
1230 ////////////////////////////////////////////////////////////////////////////////////////////////////
1231 // Gaussian Filter
1232
1233 Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
1234 {
1235     int depth = CV_MAT_DEPTH(type);
1236
1237     if (sigma2 <= 0)
1238         sigma2 = sigma1;
1239
1240     // automatic detection of kernel size from sigma
1241     if (ksize.width <= 0 && sigma1 > 0)
1242         ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
1243     if (ksize.height <= 0 && sigma2 > 0)
1244         ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
1245
1246     CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 );
1247
1248     sigma1 = std::max(sigma1, 0.0);
1249     sigma2 = std::max(sigma2, 0.0);
1250
1251     Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) );
1252     Mat ky;
1253     if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON )
1254         ky = kx;
1255     else
1256         ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) );
1257
1258     return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);
1259 }
1260
1261 Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
1262 {
1263     int depth = CV_MAT_DEPTH(type);
1264
1265     if (sigma2 <= 0)
1266         sigma2 = sigma1;
1267
1268     // automatic detection of kernel size from sigma
1269     if (ksize.width <= 0 && sigma1 > 0)
1270         ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
1271     if (ksize.height <= 0 && sigma2 > 0)
1272         ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;
1273
1274     CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 );
1275
1276     sigma1 = std::max(sigma1, 0.0);
1277     sigma2 = std::max(sigma2, 0.0);
1278
1279     Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) );
1280     Mat ky;
1281     if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON )
1282         ky = kx;
1283     else
1284         ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) );
1285
1286     return createSeparableLinearFilter_GPU(type, type, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType);
1287 }
1288
1289 void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
1290 {
1291     if (ksize.width == 1 && ksize.height == 1)
1292     {
1293         src.copyTo(dst);
1294         return;
1295     }
1296
1297     dst.create(src.size(), src.type());
1298
1299     Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, rowBorderType, columnBorderType);
1300     f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
1301 }
1302
1303 void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2, int rowBorderType, int columnBorderType, Stream& stream)
1304 {
1305     if (ksize.width == 1 && ksize.height == 1)
1306     {
1307         src.copyTo(dst);
1308         return;
1309     }
1310
1311     dst.create(src.size(), src.type());
1312
1313     Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, buf, sigma1, sigma2, rowBorderType, columnBorderType);
1314     f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
1315 }
1316
1317 ////////////////////////////////////////////////////////////////////////////////////////////////////
1318 // Image Rank Filter
1319
1320 namespace
1321 {
1322     typedef NppStatus (*nppFilterRank_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
1323         NppiSize oMaskSize, NppiPoint oAnchor);
1324
1325     struct NPPRankFilter : public BaseFilter_GPU
1326     {
1327         NPPRankFilter(const Size& ksize_, const Point& anchor_, nppFilterRank_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}
1328
1329         virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
1330         {
1331             NppiSize sz;
1332             sz.width = src.cols;
1333             sz.height = src.rows;
1334             NppiSize oKernelSize;
1335             oKernelSize.height = ksize.height;
1336             oKernelSize.width = ksize.width;
1337             NppiPoint oAnchor;
1338             oAnchor.x = anchor.x;
1339             oAnchor.y = anchor.y;
1340
1341             cudaStream_t stream = StreamAccessor::getStream(s);
1342
1343             NppStreamHandler h(stream);
1344
1345             nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
1346
1347             if (stream == 0)
1348                 cudaSafeCall( cudaDeviceSynchronize() );
1349         }
1350
1351         nppFilterRank_t func;
1352     };
1353 }
1354
1355 Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)
1356 {
1357     static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R};
1358
1359     CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
1360
1361     normalizeAnchor(anchor, ksize);
1362
1363     return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));
1364 }
1365
1366 Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)
1367 {
1368     static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R};
1369
1370     CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
1371
1372     normalizeAnchor(anchor, ksize);
1373
1374     return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));
1375 }
1376
1377 #endif