From 8ed47c01b7c0aa6fd48acaac38c063cd91f856e9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 1 Oct 2013 15:28:51 +0400 Subject: [PATCH] used new device layer for cv::cuda::norm --- modules/cudaarithm/src/cuda/norm.cu | 119 ++++++++++++++++++++++++++++++++++ modules/cudaarithm/src/reductions.cpp | 53 --------------- 2 files changed, 119 insertions(+), 53 deletions(-) create mode 100644 modules/cudaarithm/src/cuda/norm.cu diff --git a/modules/cudaarithm/src/cuda/norm.cu b/modules/cudaarithm/src/cuda/norm.cu new file mode 100644 index 0000000..bda6b45 --- /dev/null +++ b/modules/cudaarithm/src/cuda/norm.cu @@ -0,0 +1,119 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +namespace +{ + double normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf) + { + const GpuMat_& src1 = (const GpuMat_&) _src1; + const GpuMat_& src2 = (const GpuMat_&) _src2; + GpuMat_& buf = (GpuMat_&) _buf; + + gridFindMinMaxVal(abs_(cvt_(src1) - cvt_(src2)), buf); + + int data[2]; + buf.download(cv::Mat(1, 2, buf.type(), data)); + + return data[1]; + } + + double normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf) + { + const GpuMat_& src1 = (const GpuMat_&) _src1; + const GpuMat_& src2 = (const GpuMat_&) _src2; + GpuMat_& buf = (GpuMat_&) _buf; + + gridCalcSum(abs_(cvt_(src1) - cvt_(src2)), buf); + + int data; + buf.download(cv::Mat(1, 1, buf.type(), &data)); + + return data; + } + + double normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf) + { + const GpuMat_& src1 = (const GpuMat_&) _src1; + const GpuMat_& src2 = (const GpuMat_&) _src2; + GpuMat_& buf = (GpuMat_&) _buf; + + gridCalcSum(sqr_(cvt_(src1) - cvt_(src2)), buf); + + double data; + buf.download(cv::Mat(1, 1, buf.type(), &data)); + + return std::sqrt(data); + } +} + +double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType) +{ + typedef double (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf); + static const func_t funcs[] = + { + 0, normDiffInf, normDiffL1, 0, normDiffL2 + }; + + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + + CV_Assert( src1.type() == CV_8UC1 ); + CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() ); + CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 ); + + return funcs[normType](src1, src2, buf); +} + +#endif diff --git a/modules/cudaarithm/src/reductions.cpp b/modules/cudaarithm/src/reductions.cpp index 5a4a2df..c1e2af4 100644 --- a/modules/cudaarithm/src/reductions.cpp +++ b/modules/cudaarithm/src/reductions.cpp @@ -133,59 +133,6 @@ double cv::cuda::norm(InputArray _src, int normType, InputArray _mask, GpuMat& b return std::max(std::abs(min_val), std::abs(max_val)); } -double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType) -{ -#if CUDA_VERSION < 5050 - (void) buf; - - typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); - - static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; -#else - typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, - NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer); - - typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize); - - static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; - - static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R}; -#endif - - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); - - CV_Assert( src1.type() == CV_8UC1 ); - CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() ); - CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 ); - - NppiSize sz; - sz.width = src1.cols; - sz.height = src1.rows; - - const int funcIdx = normType >> 1; - - DeviceBuffer dbuf; - -#if CUDA_VERSION < 5050 - nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf) ); -#else - int bufSize; - buf_size_funcs[funcIdx](sz, &bufSize); - - ensureSizeIsEnough(1, bufSize, CV_8UC1, buf); - - nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf, buf.data) ); -#endif - - cudaSafeCall( cudaDeviceSynchronize() ); - - double retVal; - dbuf.download(&retVal); - - return retVal; -} - //////////////////////////////////////////////////////////////////////// // meanStdDev -- 2.7.4