1 /*M///////////////////////////////////////////////////////////////////////////////////////
\r
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
\r
5 // By downloading, copying, installing or using the software you agree to this license.
\r
6 // If you do not agree to this license, do not download, install,
\r
7 // copy or use the software.
\r
10 // License Agreement
\r
11 // For Open Source Computer Vision Library
\r
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
\r
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
\r
15 // Third party copyrights are property of their respective owners.
\r
17 // Redistribution and use in source and binary forms, with or without modification,
\r
18 // are permitted provided that the following conditions are met:
\r
20 // * Redistribution's of source code must retain the above copyright notice,
\r
21 // this list of conditions and the following disclaimer.
\r
23 // * Redistribution's in binary form must reproduce the above copyright notice,
\r
24 // this list of conditions and the following disclaimer in the documentation
\r
25 // and/or other materials provided with the distribution.
\r
27 // * The name of the copyright holders may not be used to endorse or promote products
\r
28 // derived from this software without specific prior written permission.
\r
30 // This software is provided by the copyright holders and contributors "as is" and
\r
31 // any express or implied warranties, including, but not limited to, the implied
\r
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
\r
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
\r
34 // indirect, incidental, special, exemplary, or consequential damages
\r
35 // (including, but not limited to, procurement of substitute goods or services;
\r
36 // loss of use, data, or profits; or business interruption) however caused
\r
37 // and on any theory of liability, whether in contract, strict liability,
\r
38 // or tort (including negligence or otherwise) arising in any way out of
\r
39 // the use of this software, even if advised of the possibility of such damage.
\r
43 #include "internal_shared.hpp"
\r
44 #include "opencv2/gpu/device/limits.hpp"
\r
46 namespace cv { namespace gpu { namespace device
\r
48 namespace bilateral_filter
\r
50 __constant__ float* ctable_color;
\r
51 __constant__ float* ctable_space;
\r
52 __constant__ size_t ctable_space_step;
\r
54 __constant__ int cndisp;
\r
55 __constant__ int cradius;
\r
57 __constant__ short cedge_disc;
\r
58 __constant__ short cmax_disc;
\r
60 void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)
\r
62 cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
\r
63 cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
\r
64 size_t table_space_step = table_space.step / sizeof(float);
\r
65 cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
\r
67 cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
\r
68 cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
\r
70 cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
\r
71 cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
\r
74 template <int channels>
\r
77 static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
\r
79 uchar x = ::abs(a[0] - b[0]);
\r
80 uchar y = ::abs(a[1] - b[1]);
\r
81 uchar z = ::abs(a[2] - b[2]);
\r
82 return (::max(::max(x, y), z));
\r
87 struct DistRgbMax<1>
\r
89 static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
\r
91 return ::abs(a[0] - b[0]);
\r
95 template <int channels, typename T>
\r
96 __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
\r
98 const int y = blockIdx.y * blockDim.y + threadIdx.y;
\r
99 const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
\r
103 if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
\r
105 dp[0] = *(disp + (y ) * disp_step + x + 0);
\r
106 dp[1] = *(disp + (y-1) * disp_step + x + 0);
\r
107 dp[2] = *(disp + (y ) * disp_step + x - 1);
\r
108 dp[3] = *(disp + (y+1) * disp_step + x + 0);
\r
109 dp[4] = *(disp + (y ) * disp_step + x + 1);
\r
111 if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
\r
113 const int ymin = ::max(0, y - cradius);
\r
114 const int xmin = ::max(0, x - cradius);
\r
115 const int ymax = ::min(h - 1, y + cradius);
\r
116 const int xmax = ::min(w - 1, x + cradius);
\r
118 float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
\r
120 const uchar* ic = img + y * img_step + channels * x;
\r
122 for(int yi = ymin; yi <= ymax; yi++)
\r
124 const T* disp_y = disp + yi * disp_step;
\r
126 for(int xi = xmin; xi <= xmax; xi++)
\r
128 const uchar* in = img + yi * img_step + channels * xi;
\r
130 uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
\r
132 const float weight = ctable_color[dist_rgb] * (ctable_space + ::abs(y-yi)* ctable_space_step)[::abs(x-xi)];
\r
134 const T disp_reg = disp_y[xi];
\r
136 cost[0] += ::min(cmax_disc, ::abs(disp_reg - dp[0])) * weight;
\r
137 cost[1] += ::min(cmax_disc, ::abs(disp_reg - dp[1])) * weight;
\r
138 cost[2] += ::min(cmax_disc, ::abs(disp_reg - dp[2])) * weight;
\r
139 cost[3] += ::min(cmax_disc, ::abs(disp_reg - dp[3])) * weight;
\r
140 cost[4] += ::min(cmax_disc, ::abs(disp_reg - dp[4])) * weight;
\r
144 float minimum = numeric_limits<float>::max();
\r
147 if (cost[0] < minimum)
\r
152 if (cost[1] < minimum)
\r
157 if (cost[2] < minimum)
\r
162 if (cost[3] < minimum)
\r
167 if (cost[4] < minimum)
\r
173 *(disp + y * disp_step + x) = dp[id];
\r
178 template <typename T>
\r
179 void bilateral_filter_caller(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
\r
181 dim3 threads(32, 8, 1);
\r
182 dim3 grid(1, 1, 1);
\r
183 grid.x = divUp(disp.cols, threads.x << 1);
\r
184 grid.y = divUp(disp.rows, threads.y);
\r
189 for (int i = 0; i < iters; ++i)
\r
191 bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
\r
192 cudaSafeCall( cudaGetLastError() );
\r
194 bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
\r
195 cudaSafeCall( cudaGetLastError() );
\r
199 for (int i = 0; i < iters; ++i)
\r
201 bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
\r
202 cudaSafeCall( cudaGetLastError() );
\r
204 bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
\r
205 cudaSafeCall( cudaGetLastError() );
\r
209 cv::gpu::error("Unsupported channels count", __FILE__, __LINE__, "bilateral_filter_caller");
\r
213 cudaSafeCall( cudaDeviceSynchronize() );
\r
216 void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
\r
218 bilateral_filter_caller(disp, img, channels, iters, stream);
\r
221 void bilateral_filter_gpu(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
\r
223 bilateral_filter_caller(disp, img, channels, iters, stream);
\r
225 } // namespace bilateral_filter
\r
226 }}} // namespace cv { namespace gpu { namespace device
\r