1 /*M///////////////////////////////////////////////////////////////////////////////////////
\r
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
\r
5 // By downloading, copying, installing or using the software you agree to this license.
\r
6 // If you do not agree to this license, do not download, install,
\r
7 // copy or use the software.
\r
10 // License Agreement
\r
11 // For Open Source Computer Vision Library
\r
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
\r
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
\r
15 // Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
\r
16 // Third party copyrights are property of their respective owners.
\r
18 // Redistribution and use in source and binary forms, with or without modification,
\r
19 // are permitted provided that the following conditions are met:
\r
21 // * Redistribution's of source code must retain the above copyright notice,
\r
22 // this list of conditions and the following disclaimer.
\r
24 // * Redistribution's in binary form must reproduce the above copyright notice,
\r
25 // this list of conditions and the following disclaimer in the documentation
\r
26 // and/or other materials provided with the distribution.
\r
28 // * The name of the copyright holders may not be used to endorse or promote products
\r
29 // derived from this software without specific prior written permission.
\r
31 // This software is provided by the copyright holders and contributors "as is" and
\r
32 // any express or bpied warranties, including, but not limited to, the bpied
\r
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
\r
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
\r
35 // indirect, incidental, special, exemplary, or consequential damages
\r
36 // (including, but not limited to, procurement of substitute goods or services;
\r
37 // loss of use, data, or profits; or business interruption) however caused
\r
38 // and on any theory of liability, whether in contract, strict liability,
\r
39 // or tort (including negligence or otherwise) arising in any way out of
\r
40 // the use of this software, even if advised of the possibility of such damage.
\r
44 #include "internal_shared.hpp"
\r
46 #include "opencv2/gpu/device/vec_traits.hpp"
\r
47 #include "opencv2/gpu/device/vec_math.hpp"
\r
48 #include "opencv2/gpu/device/border_interpolate.hpp"
\r
50 using namespace cv::gpu;
\r
52 typedef unsigned char uchar;
\r
53 typedef unsigned short ushort;
\r
55 //////////////////////////////////////////////////////////////////////////////////
\r
56 /// Bilateral filtering
\r
58 namespace cv { namespace gpu { namespace device
\r
62 __device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
\r
63 __device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
\r
64 __device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
\r
65 __device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
\r
67 __device__ __forceinline__ float sqr(const float& a) { return a * a; }
\r
69 template<typename T, typename B>
\r
70 __global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
\r
72 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
\r
74 int x = threadIdx.x + blockIdx.x * blockDim.x;
\r
75 int y = threadIdx.y + blockIdx.y * blockDim.y;
\r
77 if (x >= src.cols || y >= src.rows)
\r
80 value_type center = saturate_cast<value_type>(src(y, x));
\r
82 value_type sum1 = VecTraits<value_type>::all(0);
\r
86 float r2 = (float)(r * r);
\r
88 int tx = x - r + ksz;
\r
89 int ty = y - r + ksz;
\r
91 if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
\r
93 for (int cy = y - r; cy < ty; ++cy)
\r
94 for (int cx = x - r; cx < tx; ++cx)
\r
96 float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
\r
100 value_type value = saturate_cast<value_type>(src(cy, cx));
\r
102 float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
\r
103 sum1 = sum1 + weight * value;
\r
104 sum2 = sum2 + weight;
\r
109 for (int cy = y - r; cy < ty; ++cy)
\r
110 for (int cx = x - r; cx < tx; ++cx)
\r
112 float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
\r
116 value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
\r
118 float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
\r
120 sum1 = sum1 + weight * value;
\r
121 sum2 = sum2 + weight;
\r
124 dst(y, x) = saturate_cast<T>(sum1 / sum2);
\r
127 template<typename T, template <typename> class B>
\r
128 void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
\r
130 dim3 block (32, 8);
\r
131 dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
\r
133 B<T> b(src.rows, src.cols);
\r
135 float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
\r
136 float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
\r
138 cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
\r
139 bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
\r
140 cudaSafeCall ( cudaGetLastError () );
\r
143 cudaSafeCall( cudaDeviceSynchronize() );
\r
146 template<typename T>
\r
147 void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
\r
149 typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
\r
151 static caller_t funcs[] =
\r
153 bilateral_caller<T, BrdReflect101>,
\r
154 bilateral_caller<T, BrdReplicate>,
\r
155 bilateral_caller<T, BrdConstant>,
\r
156 bilateral_caller<T, BrdReflect>,
\r
157 bilateral_caller<T, BrdWrap>,
\r
159 funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
\r
165 #define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
\r
166 template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
\r
168 OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
\r
169 //OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
\r
170 OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
\r
171 OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
\r
173 //OCV_INSTANTIATE_BILATERAL_FILTER(schar)
\r
174 //OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
\r
175 //OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
\r
176 //OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
\r
178 OCV_INSTANTIATE_BILATERAL_FILTER(short)
\r
179 //OCV_INSTANTIATE_BILATERAL_FILTER(short2)
\r
180 OCV_INSTANTIATE_BILATERAL_FILTER(short3)
\r
181 OCV_INSTANTIATE_BILATERAL_FILTER(short4)
\r
183 OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
\r
184 //OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
\r
185 OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
\r
186 OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
\r
188 //OCV_INSTANTIATE_BILATERAL_FILTER(int)
\r
189 //OCV_INSTANTIATE_BILATERAL_FILTER(int2)
\r
190 //OCV_INSTANTIATE_BILATERAL_FILTER(int3)
\r
191 //OCV_INSTANTIATE_BILATERAL_FILTER(int4)
\r
193 OCV_INSTANTIATE_BILATERAL_FILTER(float)
\r
194 //OCV_INSTANTIATE_BILATERAL_FILTER(float2)
\r
195 OCV_INSTANTIATE_BILATERAL_FILTER(float3)
\r
196 OCV_INSTANTIATE_BILATERAL_FILTER(float4)
\r