added GPU bilateral filter + tests
[profile/ivi/opencv.git] / modules / gpu / src / cuda / bilateral_filter.cu
1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
2 //\r
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
4 //\r
5 //  By downloading, copying, installing or using the software you agree to this license.\r
6 //  If you do not agree to this license, do not download, install,\r
7 //  copy or use the software.\r
8 //\r
9 //\r
10 //                           License Agreement\r
11 //                For Open Source Computer Vision Library\r
12 //\r
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
15 // Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.\r
16 // Third party copyrights are property of their respective owners.\r
17 //\r
18 // Redistribution and use in source and binary forms, with or without modification,\r
19 // are permitted provided that the following conditions are met:\r
20 //\r
21 //   * Redistribution's of source code must retain the above copyright notice,\r
22 //     this list of conditions and the following disclaimer.\r
23 //\r
24 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
25 //     this list of conditions and the following disclaimer in the documentation\r
26 //     and/or other materials provided with the distribution.\r
27 //\r
28 //   * The name of the copyright holders may not be used to endorse or promote products\r
29 //     derived from this software without specific prior written permission.\r
30 //\r
31 // This software is provided by the copyright holders and contributors "as is" and\r
32 // any express or bpied warranties, including, but not limited to, the bpied\r
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
34 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
35 // indirect, incidental, special, exemplary, or consequential damages\r
36 // (including, but not limited to, procurement of substitute goods or services;\r
37 // loss of use, data, or profits; or business interruption) however caused\r
38 // and on any theory of liability, whether in contract, strict liability,\r
39 // or tort (including negligence or otherwise) arising in any way out of\r
40 // the use of this software, even if advised of the possibility of such damage.\r
41 //\r
42 //M*/\r
43 \r
44 #include "internal_shared.hpp"\r
45 \r
46 #include "opencv2/gpu/device/vec_traits.hpp"\r
47 #include "opencv2/gpu/device/vec_math.hpp"\r
48 #include "opencv2/gpu/device/border_interpolate.hpp"\r
49 \r
50 using namespace cv::gpu;\r
51 \r
52 typedef unsigned char uchar;\r
53 typedef unsigned short ushort;\r
54 \r
55 //////////////////////////////////////////////////////////////////////////////////\r
56 /// Bilateral filtering\r
57 \r
58 namespace cv { namespace gpu { namespace device\r
59 {\r
60     namespace imgproc\r
61     {\r
62         __device__ __forceinline__ float norm_l1(const float& a)  { return ::fabs(a); }\r
63         __device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }\r
64         __device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }\r
65         __device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }\r
66 \r
67         __device__ __forceinline__ float sqr(const float& a)  { return a * a; }\r
68 \r
69         template<typename T, typename B> \r
70         __global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)\r
71         {\r
72             typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;\r
73             \r
74             int x = threadIdx.x + blockIdx.x * blockDim.x;\r
75             int y = threadIdx.y + blockIdx.y * blockDim.y;\r
76 \r
77             if (x >= src.cols || y >= src.rows)\r
78                 return;\r
79 \r
80             value_type center = saturate_cast<value_type>(src(y, x));\r
81 \r
82             value_type sum1 = VecTraits<value_type>::all(0);\r
83             float sum2 = 0;\r
84 \r
85             int r = ksz / 2;\r
86             float r2 = (float)(r * r);\r
87 \r
88             int tx = x - r + ksz;\r
89             int ty = y - r + ksz;\r
90 \r
91             if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)\r
92             {\r
93                 for (int cy = y - r; cy < ty; ++cy)\r
94                     for (int cx = x - r; cx < tx; ++cx)\r
95                     {\r
96                         float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);\r
97                         if (space2 > r2)\r
98                             continue;\r
99 \r
100                         value_type value = saturate_cast<value_type>(src(cy, cx));\r
101 \r
102                         float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);\r
103                         sum1 = sum1 + weight * value;\r
104                         sum2 = sum2 + weight;\r
105                     }\r
106             }\r
107             else\r
108             {\r
109                 for (int cy = y - r; cy < ty; ++cy)\r
110                     for (int cx = x - r; cx < tx; ++cx)\r
111                     {\r
112                         float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);\r
113                         if (space2 > r2)\r
114                             continue;\r
115 \r
116                         value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));\r
117 \r
118                         float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);\r
119 \r
120                         sum1 = sum1 + weight * value;\r
121                         sum2 = sum2 + weight;\r
122                     }\r
123             }\r
124             dst(y, x) = saturate_cast<T>(sum1 / sum2);\r
125         }\r
126 \r
127         template<typename T, template <typename> class B>\r
128         void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)\r
129         {\r
130             dim3 block (32, 8);\r
131             dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));\r
132 \r
133             B<T> b(src.rows, src.cols);\r
134 \r
135             float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);\r
136              float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);\r
137 \r
138             cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );\r
139             bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);\r
140             cudaSafeCall ( cudaGetLastError () );\r
141 \r
142             if (stream == 0)\r
143                 cudaSafeCall( cudaDeviceSynchronize() );\r
144         }\r
145 \r
146         template<typename T>\r
147         void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)\r
148         {\r
149             typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);\r
150 \r
151             static caller_t funcs[] = \r
152             {\r
153                 bilateral_caller<T, BrdReflect101>,\r
154                 bilateral_caller<T, BrdReplicate>,\r
155                 bilateral_caller<T, BrdConstant>,\r
156                 bilateral_caller<T, BrdReflect>,\r
157                 bilateral_caller<T, BrdWrap>,\r
158             };\r
159             funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);\r
160         }\r
161     }\r
162 }}}\r
163 \r
164 \r
165 #define OCV_INSTANTIATE_BILATERAL_FILTER(T) \\r
166     template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);\r
167 \r
168 OCV_INSTANTIATE_BILATERAL_FILTER(uchar)\r
169 //OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)\r
170 OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)\r
171 OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)\r
172 \r
173 //OCV_INSTANTIATE_BILATERAL_FILTER(schar)\r
174 //OCV_INSTANTIATE_BILATERAL_FILTER(schar2)\r
175 //OCV_INSTANTIATE_BILATERAL_FILTER(schar3)\r
176 //OCV_INSTANTIATE_BILATERAL_FILTER(schar4)\r
177 \r
178 OCV_INSTANTIATE_BILATERAL_FILTER(short)\r
179 //OCV_INSTANTIATE_BILATERAL_FILTER(short2)\r
180 OCV_INSTANTIATE_BILATERAL_FILTER(short3)\r
181 OCV_INSTANTIATE_BILATERAL_FILTER(short4)\r
182 \r
183 OCV_INSTANTIATE_BILATERAL_FILTER(ushort)\r
184 //OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)\r
185 OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)\r
186 OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)\r
187 \r
188 //OCV_INSTANTIATE_BILATERAL_FILTER(int)\r
189 //OCV_INSTANTIATE_BILATERAL_FILTER(int2)\r
190 //OCV_INSTANTIATE_BILATERAL_FILTER(int3)\r
191 //OCV_INSTANTIATE_BILATERAL_FILTER(int4)\r
192 \r
193 OCV_INSTANTIATE_BILATERAL_FILTER(float)\r
194 //OCV_INSTANTIATE_BILATERAL_FILTER(float2)\r
195 OCV_INSTANTIATE_BILATERAL_FILTER(float3)\r
196 OCV_INSTANTIATE_BILATERAL_FILTER(float4)\r