1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
16 // Third party copyrights are property of their respective owners.
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or bpied warranties, including, but not limited to, the bpied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
44 #if !defined CUDA_DISABLER
46 #include "internal_shared.hpp"
48 #include "opencv2/gpu/device/vec_traits.hpp"
49 #include "opencv2/gpu/device/vec_math.hpp"
50 #include "opencv2/gpu/device/border_interpolate.hpp"
52 using namespace cv::gpu;
54 typedef unsigned char uchar;
55 typedef unsigned short ushort;
57 //////////////////////////////////////////////////////////////////////////////////
58 /// Bilateral filtering
60 namespace cv { namespace gpu { namespace device
64 __device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
65 __device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
66 __device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
67 __device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
69 __device__ __forceinline__ float sqr(const float& a) { return a * a; }
71 template<typename T, typename B>
72 __global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
74 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
76 int x = threadIdx.x + blockIdx.x * blockDim.x;
77 int y = threadIdx.y + blockIdx.y * blockDim.y;
79 if (x >= src.cols || y >= src.rows)
82 value_type center = saturate_cast<value_type>(src(y, x));
84 value_type sum1 = VecTraits<value_type>::all(0);
88 float r2 = (float)(r * r);
93 if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
95 for (int cy = y - r; cy < ty; ++cy)
96 for (int cx = x - r; cx < tx; ++cx)
98 float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
102 value_type value = saturate_cast<value_type>(src(cy, cx));
104 float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
105 sum1 = sum1 + weight * value;
106 sum2 = sum2 + weight;
111 for (int cy = y - r; cy < ty; ++cy)
112 for (int cx = x - r; cx < tx; ++cx)
114 float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
118 value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
120 float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
122 sum1 = sum1 + weight * value;
123 sum2 = sum2 + weight;
126 dst(y, x) = saturate_cast<T>(sum1 / sum2);
129 template<typename T, template <typename> class B>
130 void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
133 dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
135 B<T> b(src.rows, src.cols);
137 float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
138 float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
140 cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
141 bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
142 cudaSafeCall ( cudaGetLastError () );
145 cudaSafeCall( cudaDeviceSynchronize() );
149 void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
151 typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
153 static caller_t funcs[] =
155 bilateral_caller<T, BrdReflect101>,
156 bilateral_caller<T, BrdReplicate>,
157 bilateral_caller<T, BrdConstant>,
158 bilateral_caller<T, BrdReflect>,
159 bilateral_caller<T, BrdWrap>,
161 funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
167 #define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
168 template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
170 OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
171 //OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
172 OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
173 OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
175 //OCV_INSTANTIATE_BILATERAL_FILTER(schar)
176 //OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
177 //OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
178 //OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
180 OCV_INSTANTIATE_BILATERAL_FILTER(short)
181 //OCV_INSTANTIATE_BILATERAL_FILTER(short2)
182 OCV_INSTANTIATE_BILATERAL_FILTER(short3)
183 OCV_INSTANTIATE_BILATERAL_FILTER(short4)
185 OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
186 //OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
187 OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
188 OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
190 //OCV_INSTANTIATE_BILATERAL_FILTER(int)
191 //OCV_INSTANTIATE_BILATERAL_FILTER(int2)
192 //OCV_INSTANTIATE_BILATERAL_FILTER(int3)
193 //OCV_INSTANTIATE_BILATERAL_FILTER(int4)
195 OCV_INSTANTIATE_BILATERAL_FILTER(float)
196 //OCV_INSTANTIATE_BILATERAL_FILTER(float2)
197 OCV_INSTANTIATE_BILATERAL_FILTER(float3)
198 OCV_INSTANTIATE_BILATERAL_FILTER(float4)
201 #endif /* CUDA_DISABLER */