From 3a1beb1c0155cdcbda479effc71d4e153c778a9d Mon Sep 17 00:00:00 2001
From: Anatoly Baksheev <no@email>
Date: Thu, 21 Jul 2011 08:47:44 +0000
Subject: [PATCH] added cv::gpu::pow, ticket #1227

---
 modules/gpu/include/opencv2/gpu/gpu.hpp           |  6 ++
 modules/gpu/src/cuda/element_operations.cu        | 60 ++++++++++++++++++++
 modules/gpu/src/element_operations.cpp            | 34 ++++++++++++
 modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp | 14 +++++
 modules/gpu/src/stereocsbp.cpp                    |  2 +-
 modules/gpu/test/test_arithm.cpp                  | 68 +++++++++++++++++++++++
 modules/gpu/test/test_main.cpp                    |  6 +-
 7 files changed, 188 insertions(+), 2 deletions(-)
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index 4a4df8a..393611e 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -532,6 +532,12 @@ namespace cv
         //! computes exponent of each matrix element (b = e**a)
         //! supports only CV_32FC1 type
         CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null());
+        
+        //! computes power of each matrix element:
+        //    (dst(i,j) = pow(     src(i,j) , power), if src.type() is integer
+        //    (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
+        //! supports all, except depth == CV_64F
+        CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null());
 
         //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
         //! supports only CV_32FC1 type
diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu
index 28d4eab..f467361 100644
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
@@ -42,6 +42,7 @@
 
 #include "opencv2/gpu/device/vecmath.hpp"
 #include "opencv2/gpu/device/transform.hpp"
+#include "opencv2/gpu/device/limits_gpu.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "internal_shared.hpp"
 
@@ -669,4 +670,63 @@ namespace cv { namespace gpu { namespace mathfunc
     }
 
     template void subtractCaller<short>(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // pow
+    
+    template<typename T, bool Signed = device::numeric_limits_gpu<T>::is_signed>
+    struct PowOp
+    {    
+        float power;
+        PowOp(float power_) : power(power_) {}
+    
+        template<typename T>
+        __device__ __forceinline__ T operator()(const T& e) const
+        {      
+            return saturate_cast<T>(__powf((float)e, power));
+        }      
+    };
+
+    template<typename T>
+    struct PowOp<T, true>
+    {
+        float power;
+        PowOp(float power_) : power(power_) {}
+
+        __device__ __forceinline__ float operator()(const T& e)
+        {
+              T res = saturate_cast<T>(__powf((float)e, power));            
+            
+            if ( (e < 0) && (1 & (int)power) )
+                    res *= -1;            
+            return res;         
+        }
+    };
+
+    template<>
+    struct PowOp<float>
+    {
+        float power;
+        PowOp(float power_) : power(power_) {}
+
+        __device__ __forceinline__ float operator()(const float& e)
+        {
+            return __powf(fabs(e), power);
+        }
+    };
+
+    template<typename T>
+    void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream)
+    {
+        transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
+    }   
+
+    template void pow_caller<uchar>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+    template void pow_caller<schar>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+    template void pow_caller<short>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+    template void pow_caller<ushort>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+    template void pow_caller<int>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+    template void pow_caller<uint>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+    template void pow_caller<float>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
 }}}
diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp
index c392df7..4feb819 100644
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
@@ -68,6 +68,8 @@ void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(
 void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
 double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_nogpu(); return 0.0;}
 
+void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&)  { throw_nogpu(); }
+
 #else
 
 ////////////////////////////////////////////////////////////////////////
@@ -768,4 +770,36 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
     return thresh;
 }
 
+////////////////////////////////////////////////////////////////////////
+// pow
+
+namespace cv
+{
+    namespace gpu
+    {
+        namespace mathfunc
+        {
+            template<typename T>
+            void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+        }
+    }
+}
+
+void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
+{    
+    CV_Assert( src.depth() != CV_64F );
+    dst.create(src.size(), src.type());
+
+    typedef void (*caller_t)(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
+
+    static const caller_t callers[] = 
+    {
+        mathfunc::pow_caller<unsigned char>,  mathfunc::pow_caller<signed char>, 
+        mathfunc::pow_caller<unsigned short>, mathfunc::pow_caller<short>, 
+        mathfunc::pow_caller<int>, mathfunc::pow_caller<float>
+    };
+
+    callers[src.depth()](src.reshape(1), (float)power, dst.reshape(1), StreamAccessor::getStream(stream));    
+}
+
 #endif
diff --git a/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp b/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
index 28231dc..4baa2f9 100644
--- a/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
@@ -87,6 +87,20 @@ namespace cv { namespace gpu { namespace device
         static const bool is_signed = (char)-1 == -1;
     };
 
+     template<> struct numeric_limits_gpu<signed char>
+    {
+        typedef char type;
+        __device__ __forceinline__ static type min() { return CHAR_MIN; };
+        __device__ __forceinline__ static type max() { return CHAR_MAX; };
+        __device__ __forceinline__ static type epsilon();
+        __device__ __forceinline__ static type round_error();
+        __device__ __forceinline__ static type denorm_min();
+        __device__ __forceinline__ static type infinity();
+        __device__ __forceinline__ static type quiet_NaN();
+        __device__ __forceinline__ static type signaling_NaN();
+        static const bool is_signed = (signed char)-1 == -1;
+    };
+
     template<> struct numeric_limits_gpu<unsigned char>
     {
         typedef unsigned char type;
diff --git a/modules/gpu/src/stereocsbp.cpp b/modules/gpu/src/stereocsbp.cpp
index 25beb8b..cf02993 100644
--- a/modules/gpu/src/stereocsbp.cpp
+++ b/modules/gpu/src/stereocsbp.cpp
@@ -107,7 +107,7 @@ void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int he
     levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
     if (levels == 0) levels++;
 
-    nr_plane = (int) ((float) ndisp / pow(2.0, levels + 1));
+    nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
 }
 
 cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
diff --git a/modules/gpu/test/test_arithm.cpp b/modules/gpu/test/test_arithm.cpp
index 7905dec..0c7494f 100644
--- a/modules/gpu/test/test_arithm.cpp
+++ b/modules/gpu/test/test_arithm.cpp
@@ -752,6 +752,74 @@ TEST_P(Exp, Accuracy)
 
 INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::ValuesIn(devices()));
 
+
+
+////////////////////////////////////////////////////////////////////////////////
+// pow
+
+struct Pow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    double power;
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
+    {        
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());        
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+        //size = cv::Size(2, 2);
+
+        mat = cvtest::randomMat(rng, size, type, 0.0, 100.0, false);        
+
+        if (mat.depth() == CV_32F)
+            power = rng.uniform(1.2f, 3.f);
+        else
+        {
+            int ipower = rng.uniform(2, 8);
+            power = (float)ipower;
+        }
+        cv::pow(mat, power, dst_gold);
+    }
+};
+
+TEST_P(Pow, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(power);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpu_res;
+
+        cv::gpu::pow(cv::gpu::GpuMat(mat), power, gpu_res);
+
+        gpu_res.download(dst);
+    );
+
+    /*std::cout  << mat << std::endl << std::endl;
+    std::cout  << dst << std::endl << std::endl;
+    std::cout  << dst_gold << std::endl;*/
+    EXPECT_MAT_NEAR(dst_gold, dst, 1);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Pow, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_32F, CV_32FC3)));
+
 ////////////////////////////////////////////////////////////////////////////////
 // log
 
diff --git a/modules/gpu/test/test_main.cpp b/modules/gpu/test/test_main.cpp
index 57a26af..f03fc5a 100644
--- a/modules/gpu/test/test_main.cpp
+++ b/modules/gpu/test/test_main.cpp
@@ -68,9 +68,13 @@ void print_info()
 #endif
 
     int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
+    int driver;
+    cudaDriverGetVersion(&driver);
 
-    printf("CUDA version: %d\n", CUDART_VERSION);    
+    printf("CUDA Driver  version: %d\n", driver);        
+    printf("CUDA Runtime version: %d\n", CUDART_VERSION);    
     printf("CUDA device count: %d\n\n", deviceCount);
+    
 
     for (int i = 0; i < deviceCount; ++i)
     {
-- 
2.7.4