From 6cafec8861bf06e72a874484d80207e3ebd01748 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <no@email>
Date: Fri, 20 Aug 2010 06:47:11 +0000
Subject: [PATCH] added colorizeDisp, fixed DisparityBilateralFilter

---
 modules/gpu/include/opencv2/gpu/gpu.hpp  |   7 +-
 modules/gpu/src/bilateral_filter.cpp     |  62 ++++++++-------
 modules/gpu/src/cuda/bilateral_filter.cu |  28 -------
 modules/gpu/src/cuda/imgproc.cu          | 127 +++++++++++++++++++++++++++++++
 modules/gpu/src/imgproc_gpu.cpp          |  30 +++++++-
 5 files changed, 195 insertions(+), 59 deletions(-)
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index e5c475f..a9cd08c 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -349,6 +349,8 @@ namespace cv
         // Does mean shift filtering on GPU.
         CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
 
+        CV_EXPORTS void colorizeDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp);
+
         //////////////////////////////// StereoBM_GPU ////////////////////////////////
 
         class CV_EXPORTS StereoBM_GPU
@@ -533,6 +535,7 @@ namespace cv
             //! Acync version
             void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream);
 
+        private:
             int ndisp;
             int radius;
             int iters;
@@ -540,8 +543,8 @@ namespace cv
             float edge_threshold;
             float max_disc_threshold;
             float sigma_range;
-        private:
-            std::vector<float> table_color;
+
+            GpuMat table_color;
             GpuMat table_space;
         };
     }
diff --git a/modules/gpu/src/bilateral_filter.cpp b/modules/gpu/src/bilateral_filter.cpp
index cc745db..ddc8745 100644
--- a/modules/gpu/src/bilateral_filter.cpp
+++ b/modules/gpu/src/bilateral_filter.cpp
@@ -58,7 +58,6 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,
 
 namespace cv { namespace gpu { namespace bf 
 {
-    void calc_space_weighted_filter_gpu(const DevMem2Df& table_space, int half, float dist_space, cudaStream_t stream);
     void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc);
 
     void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream);
@@ -71,50 +70,53 @@ namespace
     const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;
     const float DEFAULT_SIGMA_RANGE = 10.0f;
 
-    inline void calc_color_weighted_table(vector<float>& table_color, float sigma_range, int len)
+    inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)
     {
-        float* color_table_x;
-        
-        table_color.resize(len);
-        color_table_x = &table_color[0];
+        Mat cpu_table_color(1, len, CV_32F);
 
-        for(int y = 0; y < len; y++) 
-            table_color[y] = static_cast<float>(std::exp(-double(y * y) / (2 * sigma_range * sigma_range)));
+        float* line = cpu_table_color.ptr<float>();
+
+        for(int i = 0; i < len; i++) 
+            line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));
+
+        table_color.upload(cpu_table_color);
     }
 
-    inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space, cudaStream_t stream)
+    inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)
     {
         int half = (win_size >> 1);
-        table_space.create(half + 1, half + 1, CV_32F);
 
-        bf::calc_space_weighted_filter_gpu(table_space, half, dist_space, stream);
+        Mat cpu_table_space(half + 1, half + 1, CV_32F);
+
+        for (int y = 0; y <= half; ++y)
+        {
+            float* row = cpu_table_space.ptr<float>(y);
+            for (int x = 0; x <= half; ++x)
+                row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);
+        }
+
+        table_space.upload(cpu_table_space);
     }
 
     template <typename T>
-    void bilateral_filter_operator(DisparityBilateralFilter& rthis, vector<float>& table_color, GpuMat& table_space, 
+    void bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold, 
+                                   GpuMat& table_color, GpuMat& table_space, 
                                    const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream)
     {
-        calc_color_weighted_table(table_color, rthis.sigma_range, 255);
-        calc_space_weighted_filter(table_space, rthis.radius * 2 + 1, rthis.radius + 1.0f, stream);
-
-        short edge_disc = max<short>(short(1), short(rthis.ndisp * rthis.edge_threshold + 0.5));
-        short max_disc = short(rthis.ndisp * rthis.max_disc_threshold + 0.5);
+        short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
+        short max_disc = short(ndisp * max_disc_threshold + 0.5);
 
-        float* table_color_dev;
-        cudaSafeCall( cudaMalloc((void**)&table_color_dev, table_color.size() * sizeof(float)) );
-        cudaSafeCall( cudaMemcpy(table_color_dev, &table_color[0], table_color.size() * sizeof(float), cudaMemcpyHostToDevice) );
-        bf::load_constants(table_color_dev, table_space, rthis.ndisp, rthis.radius, edge_disc, max_disc);
+        bf::load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
 
         if (&dst != &disp)
             disp.copyTo(dst);
 
-        bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), rthis.iters, stream);
-
-        cudaSafeCall( cudaFree(table_color_dev) );
+        bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, stream);
     }
 
-    typedef void (*bilateral_filter_operator_t)(DisparityBilateralFilter& rthis, vector<float>& table_color, GpuMat& table_space, 
-                                   const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream);
+    typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, 
+                                                GpuMat& table_color, GpuMat& table_space, 
+                                                const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream);
     
     const bilateral_filter_operator_t operators[] = 
         {bilateral_filter_operator<unsigned char>, 0, 0, bilateral_filter_operator<short>, 0, 0, 0, 0};
@@ -124,6 +126,8 @@ cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radi
     : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),
       sigma_range(DEFAULT_SIGMA_RANGE)
 {
+    calc_color_weighted_table(table_color, sigma_range, 255);
+    calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
 }
 
 cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_, 
@@ -131,20 +135,22 @@ cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radi
     : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_), 
       sigma_range(sigma_range_)
 {
+    calc_color_weighted_table(table_color, sigma_range, 255);
+    calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
 }
 
 void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst)
 {
     CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
     CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
-    operators[disp.type()](*this, table_color, table_space, disp, img, dst, 0);
+    operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, 0);
 }
 
 void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
 {
     CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
     CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
-    operators[disp.type()](*this, table_color, table_space, disp, img, dst, StreamAccessor::getStream(stream));
+    operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, StreamAccessor::getStream(stream));
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpu/src/cuda/bilateral_filter.cu
index f53d4c2..9eca3f4 100644
--- a/modules/gpu/src/cuda/bilateral_filter.cu
+++ b/modules/gpu/src/cuda/bilateral_filter.cu
@@ -53,34 +53,6 @@ using namespace cv::gpu::impl;
 
 namespace bf_krnls
 {
-    __global__ void calc_space_weighted_filter(float* table_space, size_t step, int half, float dist_space)
-    {
-        int x = blockIdx.x * blockDim.x + threadIdx.x;
-        int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-        if (y <= half && x <= half)
-            *(table_space + y * step + x) = expf(-sqrtf(float(y * y) + float(x * x)) / dist_space);
-    }
-}
-
-namespace cv { namespace gpu { namespace bf 
-{
-    void calc_space_weighted_filter_gpu(const DevMem2Df& table_space, int half, float dist_space, cudaStream_t stream)
-    {
-        dim3 threads(32, 8, 1);
-        dim3 grid(1, 1, 1);
-        grid.x = divUp(half + 1, threads.x);
-        grid.x = divUp(half + 1, threads.y);
-
-        bf_krnls::calc_space_weighted_filter<<<grid, threads, 0, stream>>>(table_space.ptr, table_space.step/sizeof(float), half, dist_space);
-
-        if (stream != 0)
-            cudaSafeCall( cudaThreadSynchronize() );
-    }
-}}}
-
-namespace bf_krnls
-{
     __constant__ float* ctable_color;
     __constant__ float* ctable_space;
     __constant__ size_t ctable_space_step;
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu
index 3ce7f32..a19f08d 100644
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -182,4 +182,131 @@ namespace cv { namespace gpu { namespace impl
     }
 }}}
 
+/////////////////////////////////// colorizeDisp ///////////////////////////////////////////////
 
+namespace imgproc
+{
+    template <typename T>
+    __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
+    {        
+        unsigned int H = ((ndisp-d) * 240)/ndisp;
+
+        unsigned int hi = (H/60) % 6;
+        float f = H/60.f - H/60;
+        float p = V * (1 - S);
+        float q = V * (1 - f * S);
+        float t = V * (1 - (1 - f) * S);
+
+        float3 res;
+        
+        if (hi == 0) //R = V,	G = t,	B = p
+        {
+            res.x = p;
+            res.y = t;
+            res.z = V;
+        }
+
+        if (hi == 1) // R = q,	G = V,	B = p
+        {
+            res.x = p;
+            res.y = V;
+            res.z = q;
+        }        
+        
+        if (hi == 2) // R = p,	G = V,	B = t
+        {
+            res.x = t;
+            res.y = V;
+            res.z = p;
+        }
+            
+        if (hi == 3) // R = p,	G = q,	B = V
+        {
+            res.x = V;
+            res.y = q;
+            res.z = p;
+        }
+
+        if (hi == 4) // R = t,	G = p,	B = V
+        {
+            res.x = V;
+            res.y = p;
+            res.z = t;
+        }
+
+        if (hi == 5) // R = V,	G = p,	B = q
+        {
+            res.x = q;
+            res.y = p;
+            res.z = V;
+        }
+        unsigned int b = (unsigned int)(max(0.f, min (res.x, 1.f)) * 255.f);
+        unsigned int g = (unsigned int)(max(0.f, min (res.y, 1.f)) * 255.f);
+        unsigned int r = (unsigned int)(max(0.f, min (res.z, 1.f)) * 255.f);
+
+        return (r << 16) + (g << 8) + b;    
+    } 
+
+    __global__ void colorizeDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if(x < width && y < height) 
+        {
+            uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);
+
+            uint4 res;
+            res.x = cvtPixel(d4.x, ndisp);
+            res.y = cvtPixel(d4.y, ndisp);
+            res.z = cvtPixel(d4.z, ndisp);
+            res.w = cvtPixel(d4.w, ndisp);
+                    
+            uint4* line = (uint4*)(out_image + y * out_step);
+            line[x >> 2] = res;
+        }
+    }
+
+    __global__ void colorizeDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
+    {
+        const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if(x < width && y < height) 
+        {
+            short2 d2 = *(short2*)(disp + y * disp_step + x);
+
+            uint2 res;
+            res.x = cvtPixel(d2.x, ndisp);            
+            res.y = cvtPixel(d2.y, ndisp);
+
+            uint2* line = (uint2*)(out_image + y * out_step);
+            line[x >> 1] = res;
+        }
+    }
+}
+
+namespace cv { namespace gpu { namespace impl 
+{
+    void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp)
+    {
+        dim3 threads(16, 16, 1);
+        dim3 grid(1, 1, 1);
+        grid.x = divUp(src.cols, threads.x << 2);
+        grid.y = divUp(src.rows, threads.y);
+         
+        imgproc::colorizeDisp<<<grid, threads>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
+        cudaThreadSynchronize(); 
+    }
+
+    void colorizeDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp)
+    {
+        dim3 threads(32, 8, 1);
+        dim3 grid(1, 1, 1);
+        grid.x = divUp(src.cols, threads.x << 1);
+        grid.y = divUp(src.rows, threads.y);
+         
+        imgproc::colorizeDisp<<<grid, threads>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
+        cudaThreadSynchronize();
+    }
+}}}
diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp
index 861b03a..0bde5b5 100644
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -47,8 +47,9 @@ using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA)
 
-void cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); }
+void cv::gpu::remap(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::meanShiftFiltering_GPU(const GpuMat&, GpuMat&, int, int, TermCriteria ) { throw_nogpu(); }
+void cv::gpu::colorizeDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -59,6 +60,9 @@ namespace cv { namespace gpu
         extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst);
 
         extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);
+
+        void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp);
+        void colorizeDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp);
     }
 }}
 
@@ -98,5 +102,29 @@ void cv::gpu::meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int
     impl::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);    
 }
 
+namespace
+{
+    template <typename T>
+    void colorizeDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp)
+    {
+        impl::colorizeDisp_gpu((DevMem2D_<T>)src, dst, ndisp);
+    }
+}
+
+void cv::gpu::colorizeDisp(const GpuMat& src, GpuMat& dst, int ndisp)
+{
+    typedef void (*colorizeDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp);
+
+    static const colorizeDisp_caller_t callers[] = {colorizeDisp_caller<uchar>, 0, 0, colorizeDisp_caller<short>, 0, 0, 0, 0};
+    CV_Assert(src.type() == CV_8U || src.type() == CV_16S);
+
+    GpuMat out;
+    if (&dst != &src)
+        out = dst;
+    out.create(src.size(), CV_8UC4);
+    
+    callers[src.type()](src, out, ndisp);
+    dst = out;
+}
 
 #endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
-- 
2.7.4