From 3a1beb1c0155cdcbda479effc71d4e153c778a9d Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Thu, 21 Jul 2011 08:47:44 +0000 Subject: [PATCH] added cv::gpu::pow, ticket #1227 --- modules/gpu/include/opencv2/gpu/gpu.hpp | 6 ++ modules/gpu/src/cuda/element_operations.cu | 60 ++++++++++++++++++++ modules/gpu/src/element_operations.cpp | 34 ++++++++++++ modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp | 14 +++++ modules/gpu/src/stereocsbp.cpp | 2 +- modules/gpu/test/test_arithm.cpp | 68 +++++++++++++++++++++++ modules/gpu/test/test_main.cpp | 6 +- 7 files changed, 188 insertions(+), 2 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 4a4df8a..393611e 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -532,6 +532,12 @@ namespace cv //! computes exponent of each matrix element (b = e**a) //! supports only CV_32FC1 type CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); + + //! computes power of each matrix element: + // (dst(i,j) = pow( src(i,j) , power), if src.type() is integer + // (dst(i,j) = pow(fabs(src(i,j)), power), otherwise + //! supports all, except depth == CV_64F + CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null()); //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) //! supports only CV_32FC1 type diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 28d4eab..f467361 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -42,6 +42,7 @@ #include "opencv2/gpu/device/vecmath.hpp" #include "opencv2/gpu/device/transform.hpp" +#include "opencv2/gpu/device/limits_gpu.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" #include "internal_shared.hpp" @@ -669,4 +670,63 @@ namespace cv { namespace gpu { namespace mathfunc } template void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream); + + + ////////////////////////////////////////////////////////////////////////// + // pow + + template::is_signed> + struct PowOp + { + float power; + PowOp(float power_) : power(power_) {} + + template + __device__ __forceinline__ T operator()(const T& e) const + { + return saturate_cast(__powf((float)e, power)); + } + }; + + template + struct PowOp + { + float power; + PowOp(float power_) : power(power_) {} + + __device__ __forceinline__ float operator()(const T& e) + { + T res = saturate_cast(__powf((float)e, power)); + + if ( (e < 0) && (1 & (int)power) ) + res *= -1; + return res; + } + }; + + template<> + struct PowOp + { + float power; + PowOp(float power_) : power(power_) {} + + __device__ __forceinline__ float operator()(const float& e) + { + return __powf(fabs(e), power); + } + }; + + template + void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream) + { + transform((DevMem2D_)src, (DevMem2D_)dst, PowOp(power), stream); + } + + template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); }}} diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index c392df7..4feb819 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -68,6 +68,8 @@ void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu( void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_nogpu(); return 0.0;} +void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } + #else //////////////////////////////////////////////////////////////////////// @@ -768,4 +770,36 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double return thresh; } +//////////////////////////////////////////////////////////////////////// +// pow + +namespace cv +{ + namespace gpu + { + namespace mathfunc + { + template + void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + } + } +} + +void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) +{ + CV_Assert( src.depth() != CV_64F ); + dst.create(src.size(), src.type()); + + typedef void (*caller_t)(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + + static const caller_t callers[] = + { + mathfunc::pow_caller, mathfunc::pow_caller, + mathfunc::pow_caller, mathfunc::pow_caller, + mathfunc::pow_caller, mathfunc::pow_caller + }; + + callers[src.depth()](src.reshape(1), (float)power, dst.reshape(1), StreamAccessor::getStream(stream)); +} + #endif diff --git a/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp b/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp index 28231dc..4baa2f9 100644 --- a/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp +++ b/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp @@ -87,6 +87,20 @@ namespace cv { namespace gpu { namespace device static const bool is_signed = (char)-1 == -1; }; + template<> struct numeric_limits_gpu + { + typedef char type; + __device__ __forceinline__ static type min() { return CHAR_MIN; }; + __device__ __forceinline__ static type max() { return CHAR_MAX; }; + __device__ __forceinline__ static type epsilon(); + __device__ __forceinline__ static type round_error(); + __device__ __forceinline__ static type denorm_min(); + __device__ __forceinline__ static type infinity(); + __device__ __forceinline__ static type quiet_NaN(); + __device__ __forceinline__ static type signaling_NaN(); + static const bool is_signed = (signed char)-1 == -1; + }; + template<> struct numeric_limits_gpu { typedef unsigned char type; diff --git a/modules/gpu/src/stereocsbp.cpp b/modules/gpu/src/stereocsbp.cpp index 25beb8b..cf02993 100644 --- a/modules/gpu/src/stereocsbp.cpp +++ b/modules/gpu/src/stereocsbp.cpp @@ -107,7 +107,7 @@ void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int he levels = (int)::log(static_cast(mm)) * 2 / 3; if (levels == 0) levels++; - nr_plane = (int) ((float) ndisp / pow(2.0, levels + 1)); + nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1)); } cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_, diff --git a/modules/gpu/test/test_arithm.cpp b/modules/gpu/test/test_arithm.cpp index 7905dec..0c7494f 100644 --- a/modules/gpu/test/test_arithm.cpp +++ b/modules/gpu/test/test_arithm.cpp @@ -752,6 +752,74 @@ TEST_P(Exp, Accuracy) INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::ValuesIn(devices())); + + +//////////////////////////////////////////////////////////////////////////////// +// pow + +struct Pow : testing::TestWithParam< std::tr1::tuple > +{ + cv::gpu::DeviceInfo devInfo; + int type; + + double power; + cv::Size size; + cv::Mat mat; + + cv::Mat dst_gold; + + virtual void SetUp() + { + devInfo = std::tr1::get<0>(GetParam()); + type = std::tr1::get<1>(GetParam()); + + cv::gpu::setDevice(devInfo.deviceID()); + + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + + size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200)); + //size = cv::Size(2, 2); + + mat = cvtest::randomMat(rng, size, type, 0.0, 100.0, false); + + if (mat.depth() == CV_32F) + power = rng.uniform(1.2f, 3.f); + else + { + int ipower = rng.uniform(2, 8); + power = (float)ipower; + } + cv::pow(mat, power, dst_gold); + } +}; + +TEST_P(Pow, Accuracy) +{ + PRINT_PARAM(devInfo); + PRINT_TYPE(type); + PRINT_PARAM(size); + PRINT_PARAM(power); + + cv::Mat dst; + + ASSERT_NO_THROW( + cv::gpu::GpuMat gpu_res; + + cv::gpu::pow(cv::gpu::GpuMat(mat), power, gpu_res); + + gpu_res.download(dst); + ); + + /*std::cout << mat << std::endl << std::endl; + std::cout << dst << std::endl << std::endl; + std::cout << dst_gold << std::endl;*/ + EXPECT_MAT_NEAR(dst_gold, dst, 1); +} + +INSTANTIATE_TEST_CASE_P(Arithm, Pow, testing::Combine( + testing::ValuesIn(devices()), + testing::Values(CV_32F, CV_32FC3))); + //////////////////////////////////////////////////////////////////////////////// // log diff --git a/modules/gpu/test/test_main.cpp b/modules/gpu/test/test_main.cpp index 57a26af..f03fc5a 100644 --- a/modules/gpu/test/test_main.cpp +++ b/modules/gpu/test/test_main.cpp @@ -68,9 +68,13 @@ void print_info() #endif int deviceCount = cv::gpu::getCudaEnabledDeviceCount(); + int driver; + cudaDriverGetVersion(&driver); - printf("CUDA version: %d\n", CUDART_VERSION); + printf("CUDA Driver version: %d\n", driver); + printf("CUDA Runtime version: %d\n", CUDART_VERSION); printf("CUDA device count: %d\n\n", deviceCount); + for (int i = 0; i < deviceCount; ++i) { -- 2.7.4