added dual tvl1 optical flow gpu implementation
[profile/ivi/opencv.git] / modules / gpu / src / cuda / bilateral_filter.cu
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or bpied warranties, including, but not limited to, the bpied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 #if !defined CUDA_DISABLER
45
46 #include "internal_shared.hpp"
47
48 #include "opencv2/gpu/device/vec_traits.hpp"
49 #include "opencv2/gpu/device/vec_math.hpp"
50 #include "opencv2/gpu/device/border_interpolate.hpp"
51
52 using namespace cv::gpu;
53
54 typedef unsigned char uchar;
55 typedef unsigned short ushort;
56
57 //////////////////////////////////////////////////////////////////////////////////
58 /// Bilateral filtering
59
60 namespace cv { namespace gpu { namespace device
61 {
62     namespace imgproc
63     {
64         __device__ __forceinline__ float norm_l1(const float& a)  { return ::fabs(a); }
65         __device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
66         __device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
67         __device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
68
69         __device__ __forceinline__ float sqr(const float& a)  { return a * a; }
70
71         template<typename T, typename B>
72         __global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
73         {
74             typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
75
76             int x = threadIdx.x + blockIdx.x * blockDim.x;
77             int y = threadIdx.y + blockIdx.y * blockDim.y;
78
79             if (x >= src.cols || y >= src.rows)
80                 return;
81
82             value_type center = saturate_cast<value_type>(src(y, x));
83
84             value_type sum1 = VecTraits<value_type>::all(0);
85             float sum2 = 0;
86
87             int r = ksz / 2;
88             float r2 = (float)(r * r);
89
90             int tx = x - r + ksz;
91             int ty = y - r + ksz;
92
93             if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
94             {
95                 for (int cy = y - r; cy < ty; ++cy)
96                     for (int cx = x - r; cx < tx; ++cx)
97                     {
98                         float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
99                         if (space2 > r2)
100                             continue;
101
102                         value_type value = saturate_cast<value_type>(src(cy, cx));
103
104                         float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
105                         sum1 = sum1 + weight * value;
106                         sum2 = sum2 + weight;
107                     }
108             }
109             else
110             {
111                 for (int cy = y - r; cy < ty; ++cy)
112                     for (int cx = x - r; cx < tx; ++cx)
113                     {
114                         float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
115                         if (space2 > r2)
116                             continue;
117
118                         value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
119
120                         float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
121
122                         sum1 = sum1 + weight * value;
123                         sum2 = sum2 + weight;
124                     }
125             }
126             dst(y, x) = saturate_cast<T>(sum1 / sum2);
127         }
128
129         template<typename T, template <typename> class B>
130         void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
131         {
132             dim3 block (32, 8);
133             dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
134
135             B<T> b(src.rows, src.cols);
136
137             float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
138              float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
139
140             cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
141             bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
142             cudaSafeCall ( cudaGetLastError () );
143
144             if (stream == 0)
145                 cudaSafeCall( cudaDeviceSynchronize() );
146         }
147
148         template<typename T>
149         void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
150         {
151             typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
152
153             static caller_t funcs[] =
154             {
155                 bilateral_caller<T, BrdReflect101>,
156                 bilateral_caller<T, BrdReplicate>,
157                 bilateral_caller<T, BrdConstant>,
158                 bilateral_caller<T, BrdReflect>,
159                 bilateral_caller<T, BrdWrap>,
160             };
161             funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
162         }
163     }
164 }}}
165
166
167 #define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
168     template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
169
170 OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
171 //OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
172 OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
173 OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
174
175 //OCV_INSTANTIATE_BILATERAL_FILTER(schar)
176 //OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
177 //OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
178 //OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
179
180 OCV_INSTANTIATE_BILATERAL_FILTER(short)
181 //OCV_INSTANTIATE_BILATERAL_FILTER(short2)
182 OCV_INSTANTIATE_BILATERAL_FILTER(short3)
183 OCV_INSTANTIATE_BILATERAL_FILTER(short4)
184
185 OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
186 //OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
187 OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
188 OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
189
190 //OCV_INSTANTIATE_BILATERAL_FILTER(int)
191 //OCV_INSTANTIATE_BILATERAL_FILTER(int2)
192 //OCV_INSTANTIATE_BILATERAL_FILTER(int3)
193 //OCV_INSTANTIATE_BILATERAL_FILTER(int4)
194
195 OCV_INSTANTIATE_BILATERAL_FILTER(float)
196 //OCV_INSTANTIATE_BILATERAL_FILTER(float2)
197 OCV_INSTANTIATE_BILATERAL_FILTER(float3)
198 OCV_INSTANTIATE_BILATERAL_FILTER(float4)
199
200
201 #endif /* CUDA_DISABLER */