From 766d950ff32223525bda4df330d55e567099feb0 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 26 Aug 2013 10:11:29 +0400 Subject: [PATCH] used new device layer in math per element operations --- modules/cudaarithm/src/cuda/math.cu | 333 +++++++++++++++----------- modules/cudaarithm/src/element_operations.cpp | 264 -------------------- 2 files changed, 195 insertions(+), 402 deletions(-) diff --git a/modules/cudaarithm/src/cuda/math.cu b/modules/cudaarithm/src/cuda/math.cu index ecd9a8c..39f8220 100644 --- a/modules/cudaarithm/src/cuda/math.cu +++ b/modules/cudaarithm/src/cuda/math.cu @@ -40,196 +40,248 @@ // //M*/ -#if !defined CUDA_DISABLER +#include "opencv2/opencv_modules.hpp" -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/simd_functions.hpp" -#include "opencv2/core/cuda/limits.hpp" -#include "opencv2/core/cuda/type_traits.hpp" +#ifndef HAVE_OPENCV_CUDEV -#include "arithm_func_traits.hpp" +#error "opencv_cudev is required" -using namespace cv::cuda; -using namespace cv::cuda::device; +#else -////////////////////////////////////////////////////////////////////////// -// absMat +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" -namespace cv { namespace cuda { namespace device +using namespace cv::cudev; + +namespace { - template struct TransformFunctorTraits< abs_func > : arithm::ArithmFuncTraits + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy { + enum { + shift = 1 + }; }; -}}} +} + +////////////////////////////////////////////////////////////////////////////// +/// abs -namespace arithm +namespace { template - void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) + void absMat(const GpuMat& src, const GpuMat& dst, Stream& stream) { - device::transform((PtrStepSz) src, (PtrStepSz) dst, abs_func(), WithOutMask(), stream); + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), abs_func(), stream); } +} + +void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream); + static const func_t funcs[] = + { + absMat, + absMat, + absMat, + absMat, + absMat, + absMat, + absMat + }; - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + GpuMat src = _src.getGpuMat(); + + const int depth = src.depth(); + + CV_DbgAssert( depth <= CV_64F ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[depth](src.reshape(1), dst.reshape(1), stream); } -////////////////////////////////////////////////////////////////////////// -// sqrMat +////////////////////////////////////////////////////////////////////////////// +/// sqr -namespace arithm +namespace { - template struct Sqr : unary_function + template struct SqrOp : unary_function { __device__ __forceinline__ T operator ()(T x) const { return saturate_cast(x * x); } - - __host__ __device__ __forceinline__ Sqr() {} - __host__ __device__ __forceinline__ Sqr(const Sqr&) {} - }; -} - -namespace cv { namespace cuda { namespace device -{ - template struct TransformFunctorTraits< arithm::Sqr > : arithm::ArithmFuncTraits - { }; -}}} -namespace arithm -{ template - void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) + void sqrMat(const GpuMat& src, const GpuMat& dst, Stream& stream) { - device::transform((PtrStepSz) src, (PtrStepSz) dst, Sqr(), WithOutMask(), stream); + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), SqrOp(), stream); } - - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } -////////////////////////////////////////////////////////////////////////// -// sqrtMat - -namespace cv { namespace cuda { namespace device +void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream) { - template struct TransformFunctorTraits< sqrt_func > : arithm::ArithmFuncTraits + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream); + static const func_t funcs[] = { + sqrMat, + sqrMat, + sqrMat, + sqrMat, + sqrMat, + sqrMat, + sqrMat }; -}}} -namespace arithm + GpuMat src = _src.getGpuMat(); + + const int depth = src.depth(); + + CV_DbgAssert( depth <= CV_64F ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[depth](src.reshape(1), dst.reshape(1), stream); +} + +////////////////////////////////////////////////////////////////////////////// +/// sqrt + +namespace { template - void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) + void sqrtMat(const GpuMat& src, const GpuMat& dst, Stream& stream) { - device::transform((PtrStepSz) src, (PtrStepSz) dst, sqrt_func(), WithOutMask(), stream); + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), sqrt_func(), stream); } - - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } -////////////////////////////////////////////////////////////////////////// -// logMat - -namespace cv { namespace cuda { namespace device +void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream) { - template struct TransformFunctorTraits< log_func > : arithm::ArithmFuncTraits + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream); + static const func_t funcs[] = { + sqrtMat, + sqrtMat, + sqrtMat, + sqrtMat, + sqrtMat, + sqrtMat, + sqrtMat }; -}}} -namespace arithm -{ - template - void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - device::transform((PtrStepSz) src, (PtrStepSz) dst, log_func(), WithOutMask(), stream); - } + GpuMat src = _src.getGpuMat(); + + const int depth = src.depth(); + + CV_DbgAssert( depth <= CV_64F ); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[depth](src.reshape(1), dst.reshape(1), stream); } -////////////////////////////////////////////////////////////////////////// -// expMat +//////////////////////////////////////////////////////////////////////// +/// exp -namespace arithm +namespace { - template struct Exp : unary_function + template struct ExpOp : unary_function { __device__ __forceinline__ T operator ()(T x) const { exp_func f; return saturate_cast(f(x)); } - - __host__ __device__ __forceinline__ Exp() {} - __host__ __device__ __forceinline__ Exp(const Exp&) {} }; + + template + void expMat(const GpuMat& src, const GpuMat& dst, Stream& stream) + { + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), ExpOp(), stream); + } } -namespace cv { namespace cuda { namespace device +void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream) { - template struct TransformFunctorTraits< arithm::Exp > : arithm::ArithmFuncTraits + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream); + static const func_t funcs[] = { + expMat, + expMat, + expMat, + expMat, + expMat, + expMat, + expMat }; -}}} -namespace arithm + GpuMat src = _src.getGpuMat(); + + const int depth = src.depth(); + + CV_DbgAssert( depth <= CV_64F ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[depth](src.reshape(1), dst.reshape(1), stream); +} + +//////////////////////////////////////////////////////////////////////// +// log + +namespace { template - void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) + void logMat(const GpuMat& src, const GpuMat& dst, Stream& stream) { - device::transform((PtrStepSz) src, (PtrStepSz) dst, Exp(), WithOutMask(), stream); + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), log_func(), stream); } +} + +void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream); + static const func_t funcs[] = + { + logMat, + logMat, + logMat, + logMat, + logMat, + logMat, + logMat + }; + + GpuMat src = _src.getGpuMat(); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + const int depth = src.depth(); + + CV_DbgAssert( depth <= CV_64F ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[depth](src.reshape(1), dst.reshape(1), stream); } -////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// // pow -namespace arithm +namespace { template::is_signed> struct PowOp : unary_function { float power; - __host__ explicit PowOp(double power_) : power(static_cast(power_)) {} - __device__ __forceinline__ T operator()(T e) const { return saturate_cast(__powf((float)e, power)); @@ -239,8 +291,6 @@ namespace arithm { float power; - __host__ explicit PowOp(double power_) : power(static_cast(power_)) {} - __device__ __forceinline__ T operator()(T e) const { T res = saturate_cast(__powf((float)e, power)); @@ -255,8 +305,6 @@ namespace arithm { float power; - __host__ explicit PowOp(double power_) : power(static_cast(power_)) {} - __device__ __forceinline__ float operator()(float e) const { return __powf(::fabs(e), power); @@ -266,37 +314,46 @@ namespace arithm { double power; - __host__ explicit PowOp(double power_) : power(power_) {} - __device__ __forceinline__ double operator()(double e) const { return ::pow(::fabs(e), power); } }; + + template + void powMat(const GpuMat& src, double power, const GpuMat& dst, Stream& stream) + { + PowOp op; + op.power = static_cast::type>(power); + + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); + } } -namespace cv { namespace cuda { namespace device +void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stream) { - template struct TransformFunctorTraits< arithm::PowOp > : arithm::ArithmFuncTraits + typedef void (*func_t)(const GpuMat& src, double power, const GpuMat& dst, Stream& stream); + static const func_t funcs[] = { + powMat, + powMat, + powMat, + powMat, + powMat, + powMat, + powMat }; -}}} -namespace arithm -{ - template - void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream) - { - device::transform((PtrStepSz) src, (PtrStepSz) dst, PowOp(power), WithOutMask(), stream); - } + GpuMat src = _src.getGpuMat(); + + const int depth = src.depth(); + + CV_DbgAssert(depth <= CV_64F); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); + funcs[depth](src.reshape(1), power, dst.reshape(1), stream); } -#endif // CUDA_DISABLER +#endif diff --git a/modules/cudaarithm/src/element_operations.cpp b/modules/cudaarithm/src/element_operations.cpp index e670061..c3bbced 100644 --- a/modules/cudaarithm/src/element_operations.cpp +++ b/modules/cudaarithm/src/element_operations.cpp @@ -452,270 +452,6 @@ void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream } ////////////////////////////////////////////////////////////////////////////// -// abs - -namespace arithm -{ - template - void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - absMat, - absMat, - absMat, - absMat, - absMat, - absMat, - absMat - }; - - GpuMat src = _src.getGpuMat(); - - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - funcs[depth](src, dst, StreamAccessor::getStream(stream)); -} - -////////////////////////////////////////////////////////////////////////////// -// sqr - -namespace arithm -{ - template - void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - sqrMat, - sqrMat, - sqrMat, - sqrMat, - sqrMat, - sqrMat, - sqrMat - }; - - GpuMat src = _src.getGpuMat(); - - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - funcs[depth](src, dst, StreamAccessor::getStream(stream)); -} - -////////////////////////////////////////////////////////////////////////////// -// sqrt - -namespace arithm -{ - template - void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - sqrtMat, - sqrtMat, - sqrtMat, - sqrtMat, - sqrtMat, - sqrtMat, - sqrtMat - }; - - GpuMat src = _src.getGpuMat(); - - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - funcs[depth](src, dst, StreamAccessor::getStream(stream)); -} - -//////////////////////////////////////////////////////////////////////// -// exp - -namespace arithm -{ - template - void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - expMat, - expMat, - expMat, - expMat, - expMat, - expMat, - expMat - }; - - GpuMat src = _src.getGpuMat(); - - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - funcs[depth](src, dst, StreamAccessor::getStream(stream)); -} - -//////////////////////////////////////////////////////////////////////// -// log - -namespace arithm -{ - template - void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - logMat, - logMat, - logMat, - logMat, - logMat, - logMat, - logMat - }; - - GpuMat src = _src.getGpuMat(); - - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - funcs[depth](src, dst, StreamAccessor::getStream(stream)); -} - -//////////////////////////////////////////////////////////////////////// -// pow - -namespace arithm -{ - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stream) -{ - typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow - }; - - GpuMat src = _src.getGpuMat(); - - const int depth = src.depth(); - const int cn = src.channels(); - - CV_Assert(depth <= CV_64F); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step); - PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step); - - funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream)); -} - -////////////////////////////////////////////////////////////////////////////// // compare namespace arithm -- 2.7.4