From 6cafec8861bf06e72a874484d80207e3ebd01748 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 20 Aug 2010 06:47:11 +0000 Subject: [PATCH] added colorizeDisp, fixed DisparityBilateralFilter --- modules/gpu/include/opencv2/gpu/gpu.hpp | 7 +- modules/gpu/src/bilateral_filter.cpp | 62 ++++++++------- modules/gpu/src/cuda/bilateral_filter.cu | 28 ------- modules/gpu/src/cuda/imgproc.cu | 127 +++++++++++++++++++++++++++++++ modules/gpu/src/imgproc_gpu.cpp | 30 +++++++- 5 files changed, 195 insertions(+), 59 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index e5c475f..a9cd08c 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -349,6 +349,8 @@ namespace cv // Does mean shift filtering on GPU. CV_EXPORTS void meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); + CV_EXPORTS void colorizeDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp); + //////////////////////////////// StereoBM_GPU //////////////////////////////// class CV_EXPORTS StereoBM_GPU @@ -533,6 +535,7 @@ namespace cv //! Acync version void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream); + private: int ndisp; int radius; int iters; @@ -540,8 +543,8 @@ namespace cv float edge_threshold; float max_disc_threshold; float sigma_range; - private: - std::vector table_color; + + GpuMat table_color; GpuMat table_space; }; } diff --git a/modules/gpu/src/bilateral_filter.cpp b/modules/gpu/src/bilateral_filter.cpp index cc745db..ddc8745 100644 --- a/modules/gpu/src/bilateral_filter.cpp +++ b/modules/gpu/src/bilateral_filter.cpp @@ -58,7 +58,6 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, namespace cv { namespace gpu { namespace bf { - void calc_space_weighted_filter_gpu(const DevMem2Df& table_space, int half, float dist_space, cudaStream_t stream); void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc); void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream); @@ -71,50 +70,53 @@ namespace const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f; const float DEFAULT_SIGMA_RANGE = 10.0f; - inline void calc_color_weighted_table(vector& table_color, float sigma_range, int len) + inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len) { - float* color_table_x; - - table_color.resize(len); - color_table_x = &table_color[0]; + Mat cpu_table_color(1, len, CV_32F); - for(int y = 0; y < len; y++) - table_color[y] = static_cast(std::exp(-double(y * y) / (2 * sigma_range * sigma_range))); + float* line = cpu_table_color.ptr(); + + for(int i = 0; i < len; i++) + line[i] = static_cast(std::exp(-double(i * i) / (2 * sigma_range * sigma_range))); + + table_color.upload(cpu_table_color); } - inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space, cudaStream_t stream) + inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space) { int half = (win_size >> 1); - table_space.create(half + 1, half + 1, CV_32F); - bf::calc_space_weighted_filter_gpu(table_space, half, dist_space, stream); + Mat cpu_table_space(half + 1, half + 1, CV_32F); + + for (int y = 0; y <= half; ++y) + { + float* row = cpu_table_space.ptr(y); + for (int x = 0; x <= half; ++x) + row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space); + } + + table_space.upload(cpu_table_space); } template - void bilateral_filter_operator(DisparityBilateralFilter& rthis, vector& table_color, GpuMat& table_space, + void bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold, + GpuMat& table_color, GpuMat& table_space, const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream) { - calc_color_weighted_table(table_color, rthis.sigma_range, 255); - calc_space_weighted_filter(table_space, rthis.radius * 2 + 1, rthis.radius + 1.0f, stream); - - short edge_disc = max(short(1), short(rthis.ndisp * rthis.edge_threshold + 0.5)); - short max_disc = short(rthis.ndisp * rthis.max_disc_threshold + 0.5); + short edge_disc = max(short(1), short(ndisp * edge_threshold + 0.5)); + short max_disc = short(ndisp * max_disc_threshold + 0.5); - float* table_color_dev; - cudaSafeCall( cudaMalloc((void**)&table_color_dev, table_color.size() * sizeof(float)) ); - cudaSafeCall( cudaMemcpy(table_color_dev, &table_color[0], table_color.size() * sizeof(float), cudaMemcpyHostToDevice) ); - bf::load_constants(table_color_dev, table_space, rthis.ndisp, rthis.radius, edge_disc, max_disc); + bf::load_constants(table_color.ptr(), table_space, ndisp, radius, edge_disc, max_disc); if (&dst != &disp) disp.copyTo(dst); - bf::bilateral_filter_gpu((DevMem2D_)dst, img, img.channels(), rthis.iters, stream); - - cudaSafeCall( cudaFree(table_color_dev) ); + bf::bilateral_filter_gpu((DevMem2D_)dst, img, img.channels(), iters, stream); } - typedef void (*bilateral_filter_operator_t)(DisparityBilateralFilter& rthis, vector& table_color, GpuMat& table_space, - const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream); + typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, + GpuMat& table_color, GpuMat& table_space, + const GpuMat& disp, const GpuMat& img, GpuMat& dst, cudaStream_t stream); const bilateral_filter_operator_t operators[] = {bilateral_filter_operator, 0, 0, bilateral_filter_operator, 0, 0, 0, 0}; @@ -124,6 +126,8 @@ cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radi : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD), sigma_range(DEFAULT_SIGMA_RANGE) { + calc_color_weighted_table(table_color, sigma_range, 255); + calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f); } cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_, @@ -131,20 +135,22 @@ cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radi : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_), sigma_range(sigma_range_) { + calc_color_weighted_table(table_color, sigma_range, 255); + calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f); } void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst) { CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters); CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3)); - operators[disp.type()](*this, table_color, table_space, disp, img, dst, 0); + operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, 0); } void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream) { CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters); CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3)); - operators[disp.type()](*this, table_color, table_space, disp, img, dst, StreamAccessor::getStream(stream)); + operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, StreamAccessor::getStream(stream)); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpu/src/cuda/bilateral_filter.cu index f53d4c2..9eca3f4 100644 --- a/modules/gpu/src/cuda/bilateral_filter.cu +++ b/modules/gpu/src/cuda/bilateral_filter.cu @@ -53,34 +53,6 @@ using namespace cv::gpu::impl; namespace bf_krnls { - __global__ void calc_space_weighted_filter(float* table_space, size_t step, int half, float dist_space) - { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (y <= half && x <= half) - *(table_space + y * step + x) = expf(-sqrtf(float(y * y) + float(x * x)) / dist_space); - } -} - -namespace cv { namespace gpu { namespace bf -{ - void calc_space_weighted_filter_gpu(const DevMem2Df& table_space, int half, float dist_space, cudaStream_t stream) - { - dim3 threads(32, 8, 1); - dim3 grid(1, 1, 1); - grid.x = divUp(half + 1, threads.x); - grid.x = divUp(half + 1, threads.y); - - bf_krnls::calc_space_weighted_filter<<>>(table_space.ptr, table_space.step/sizeof(float), half, dist_space); - - if (stream != 0) - cudaSafeCall( cudaThreadSynchronize() ); - } -}}} - -namespace bf_krnls -{ __constant__ float* ctable_color; __constant__ float* ctable_space; __constant__ size_t ctable_space_step; diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 3ce7f32..a19f08d 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -182,4 +182,131 @@ namespace cv { namespace gpu { namespace impl } }}} +/////////////////////////////////// colorizeDisp /////////////////////////////////////////////// +namespace imgproc +{ + template + __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1) + { + unsigned int H = ((ndisp-d) * 240)/ndisp; + + unsigned int hi = (H/60) % 6; + float f = H/60.f - H/60; + float p = V * (1 - S); + float q = V * (1 - f * S); + float t = V * (1 - (1 - f) * S); + + float3 res; + + if (hi == 0) //R = V, G = t, B = p + { + res.x = p; + res.y = t; + res.z = V; + } + + if (hi == 1) // R = q, G = V, B = p + { + res.x = p; + res.y = V; + res.z = q; + } + + if (hi == 2) // R = p, G = V, B = t + { + res.x = t; + res.y = V; + res.z = p; + } + + if (hi == 3) // R = p, G = q, B = V + { + res.x = V; + res.y = q; + res.z = p; + } + + if (hi == 4) // R = t, G = p, B = V + { + res.x = V; + res.y = p; + res.z = t; + } + + if (hi == 5) // R = V, G = p, B = q + { + res.x = q; + res.y = p; + res.z = V; + } + unsigned int b = (unsigned int)(max(0.f, min (res.x, 1.f)) * 255.f); + unsigned int g = (unsigned int)(max(0.f, min (res.y, 1.f)) * 255.f); + unsigned int r = (unsigned int)(max(0.f, min (res.z, 1.f)) * 255.f); + + return (r << 16) + (g << 8) + b; + } + + __global__ void colorizeDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) + { + const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if(x < width && y < height) + { + uchar4 d4 = *(uchar4*)(disp + y * disp_step + x); + + uint4 res; + res.x = cvtPixel(d4.x, ndisp); + res.y = cvtPixel(d4.y, ndisp); + res.z = cvtPixel(d4.z, ndisp); + res.w = cvtPixel(d4.w, ndisp); + + uint4* line = (uint4*)(out_image + y * out_step); + line[x >> 2] = res; + } + } + + __global__ void colorizeDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) + { + const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if(x < width && y < height) + { + short2 d2 = *(short2*)(disp + y * disp_step + x); + + uint2 res; + res.x = cvtPixel(d2.x, ndisp); + res.y = cvtPixel(d2.y, ndisp); + + uint2* line = (uint2*)(out_image + y * out_step); + line[x >> 1] = res; + } + } +} + +namespace cv { namespace gpu { namespace impl +{ + void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp) + { + dim3 threads(16, 16, 1); + dim3 grid(1, 1, 1); + grid.x = divUp(src.cols, threads.x << 2); + grid.y = divUp(src.rows, threads.y); + + imgproc::colorizeDisp<<>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp); + cudaThreadSynchronize(); + } + + void colorizeDisp_gpu(const DevMem2D_& src, const DevMem2D& dst, int ndisp) + { + dim3 threads(32, 8, 1); + dim3 grid(1, 1, 1); + grid.x = divUp(src.cols, threads.x << 1); + grid.y = divUp(src.rows, threads.y); + + imgproc::colorizeDisp<<>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp); + cudaThreadSynchronize(); + } +}}} diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 861b03a..0bde5b5 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -47,8 +47,9 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) -void cv::gpu::remap(const GpuMat& /*src*/, const GpuMat& /*xmap*/, const GpuMat& /*ymap*/, GpuMat& /*dst*/) { throw_nogpu(); } +void cv::gpu::remap(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::meanShiftFiltering_GPU(const GpuMat&, GpuMat&, int, int, TermCriteria ) { throw_nogpu(); } +void cv::gpu::colorizeDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -59,6 +60,9 @@ namespace cv { namespace gpu extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_& xmap, const DevMem2D_& ymap, DevMem2D dst); extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps); + + void colorizeDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp); + void colorizeDisp_gpu(const DevMem2D_& src, const DevMem2D& dst, int ndisp); } }} @@ -98,5 +102,29 @@ void cv::gpu::meanShiftFiltering_GPU(const GpuMat& src, GpuMat& dst, int sp, int impl::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps); } +namespace +{ + template + void colorizeDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp) + { + impl::colorizeDisp_gpu((DevMem2D_)src, dst, ndisp); + } +} + +void cv::gpu::colorizeDisp(const GpuMat& src, GpuMat& dst, int ndisp) +{ + typedef void (*colorizeDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp); + + static const colorizeDisp_caller_t callers[] = {colorizeDisp_caller, 0, 0, colorizeDisp_caller, 0, 0, 0, 0}; + CV_Assert(src.type() == CV_8U || src.type() == CV_16S); + + GpuMat out; + if (&dst != &src) + out = dst; + out.create(src.size(), CV_8UC4); + + callers[src.type()](src, out, ndisp); + dst = out; +} #endif /* !defined (HAVE_CUDA) */ \ No newline at end of file -- 2.7.4