From 3f7cde0485fd23b8882e560ed53d6bd401e84cc3 Mon Sep 17 00:00:00 2001 From: Naba Kumar Date: Mon, 12 Dec 2016 12:15:31 +0200 Subject: [PATCH] Implement 32f support for morphology operation --- modules/cudafilters/src/filtering.cpp | 55 +++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/modules/cudafilters/src/filtering.cpp b/modules/cudafilters/src/filtering.cpp index fcf5452..21efde0 100644 --- a/modules/cudafilters/src/filtering.cpp +++ b/modules/cudafilters/src/filtering.cpp @@ -525,14 +525,17 @@ namespace void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); private: - typedef NppStatus (*nppMorfFilter_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI, - const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor); + typedef NppStatus (*nppMorfFilter8u_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI, + const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor); + typedef NppStatus (*nppMorfFilter32f_t)(const Npp32f* pSrc, Npp32s nSrcStep, Npp32f* pDst, Npp32s nDstStep, NppiSize oSizeROI, + const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor); int type_; GpuMat kernel_; Point anchor_; int iters_; - nppMorfFilter_t func_; + nppMorfFilter8u_t func8u_; + nppMorfFilter32f_t func32f_; GpuMat srcBorder_; GpuMat buf_; @@ -541,14 +544,19 @@ namespace MorphologyFilter::MorphologyFilter(int op, int srcType, InputArray _kernel, Point anchor, int iterations) : type_(srcType), anchor_(anchor), iters_(iterations) { - static const nppMorfFilter_t funcs[2][5] = + static const nppMorfFilter8u_t funcs8u[2][5] = { {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R }, {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R } }; + static const nppMorfFilter32f_t funcs32f[2][5] = + { + {0, nppiErode_32f_C1R, 0, 0, nppiErode_32f_C4R }, + {0, nppiDilate_32f_C1R, 0, 0, nppiDilate_32f_C4R } + }; CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); - CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 ); + CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4 ); Mat kernel = _kernel.getMat(); Size ksize = !kernel.empty() ? _kernel.size() : Size(3, 3); @@ -579,7 +587,14 @@ namespace kernel_ = cuda::createContinuous(kernel.size(), CV_8UC1); kernel_.upload(kernel8U); - func_ = funcs[op][CV_MAT_CN(srcType)]; + if(srcType == CV_8UC1 || srcType == CV_8UC4) + { + func8u_ = funcs8u[op][CV_MAT_CN(srcType)]; + } + else if(srcType == CV_32FC1 || srcType == CV_32FC4) + { + func32f_ = funcs32f[op][CV_MAT_CN(srcType)]; + } } void MorphologyFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) @@ -618,15 +633,31 @@ namespace oAnchor.x = anchor_.x; oAnchor.y = anchor_.y; - nppSafeCall( func_(srcRoi.ptr(), static_cast(srcRoi.step), dst.ptr(), static_cast(dst.step), - oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); + if (type_ == CV_8UC1 || type_ == CV_8UC4) + { + nppSafeCall( func8u_(srcRoi.ptr(), static_cast(srcRoi.step), dst.ptr(), static_cast(dst.step), + oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); - for(int i = 1; i < iters_; ++i) + for(int i = 1; i < iters_; ++i) + { + dst.copyTo(bufRoi, _stream); + + nppSafeCall( func8u_(bufRoi.ptr(), static_cast(bufRoi.step), dst.ptr(), static_cast(dst.step), + oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); + } + } + else if (type_ == CV_32FC1 || type_ == CV_32FC4) { - dst.copyTo(bufRoi, _stream); + nppSafeCall( func32f_(srcRoi.ptr(), static_cast(srcRoi.step), dst.ptr(), static_cast(dst.step), + oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); + + for(int i = 1; i < iters_; ++i) + { + dst.copyTo(bufRoi, _stream); - nppSafeCall( func_(bufRoi.ptr(), static_cast(bufRoi.step), dst.ptr(), static_cast(dst.step), - oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); + nppSafeCall( func32f_(bufRoi.ptr(), static_cast(bufRoi.step), dst.ptr(), static_cast(dst.step), + oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); + } } if (stream == 0) -- 2.7.4