1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "opencv2/gpu/device/saturate_cast.hpp"
44 #include "opencv2/gpu/device/transform.hpp"
45 #include "opencv2/gpu/device/functional.hpp"
46 #include "opencv2/gpu/device/type_traits.hpp"
48 namespace cv { namespace gpu { namespace device
50 void writeScalar(const uchar*);
51 void writeScalar(const schar*);
52 void writeScalar(const ushort*);
53 void writeScalar(const short int*);
54 void writeScalar(const int*);
55 void writeScalar(const float*);
56 void writeScalar(const double*);
57 void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
58 void convert_gpu(PtrStepSzb, int, PtrStepSzb, int, double, double, cudaStream_t);
61 namespace cv { namespace gpu { namespace device
63 template <typename T> struct shift_and_sizeof;
64 template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
65 template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
66 template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
67 template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
68 template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
69 template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
70 template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
72 ///////////////////////////////////////////////////////////////////////////
73 ////////////////////////////////// CopyTo /////////////////////////////////
74 ///////////////////////////////////////////////////////////////////////////
76 template <typename T> void copyToWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
79 cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMask(mask), stream);
81 cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
84 void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
86 typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
91 copyToWithMask<unsigned char>,
92 copyToWithMask<unsigned short>,
98 copyToWithMask<double>
101 tab[elemSize1](src, dst, cn, mask, colorMask, stream);
104 ///////////////////////////////////////////////////////////////////////////
105 ////////////////////////////////// SetTo //////////////////////////////////
106 ///////////////////////////////////////////////////////////////////////////
108 __constant__ uchar scalar_8u[4];
109 __constant__ schar scalar_8s[4];
110 __constant__ ushort scalar_16u[4];
111 __constant__ short scalar_16s[4];
112 __constant__ int scalar_32s[4];
113 __constant__ float scalar_32f[4];
114 __constant__ double scalar_64f[4];
116 template <typename T> __device__ __forceinline__ T readScalar(int i);
117 template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
118 template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
119 template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
120 template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
121 template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
122 template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
123 template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
125 void writeScalar(const uchar* vals)
127 cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
129 void writeScalar(const schar* vals)
131 cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
133 void writeScalar(const ushort* vals)
135 cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
137 void writeScalar(const short* vals)
139 cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
141 void writeScalar(const int* vals)
143 cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
145 void writeScalar(const float* vals)
147 cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
149 void writeScalar(const double* vals)
151 cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
155 __global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int channels)
157 size_t x = blockIdx.x * blockDim.x + threadIdx.x;
158 size_t y = blockIdx.y * blockDim.y + threadIdx.y;
160 if ((x < cols * channels ) && (y < rows))
162 size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
163 mat[idx] = readScalar<T>(x % channels);
168 __global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
170 size_t x = blockIdx.x * blockDim.x + threadIdx.x;
171 size_t y = blockIdx.y * blockDim.y + threadIdx.y;
173 if ((x < cols * channels ) && (y < rows))
174 if (mask[y * step_mask + x / channels] != 0)
176 size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
177 mat[idx] = readScalar<T>(x % channels);
180 template <typename T>
181 void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
185 dim3 threadsPerBlock(32, 8, 1);
186 dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
188 set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
189 cudaSafeCall( cudaGetLastError() );
192 cudaSafeCall ( cudaDeviceSynchronize() );
195 template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
196 template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
197 template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
198 template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
199 template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
200 template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
201 template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
203 template <typename T>
204 void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream)
208 dim3 threadsPerBlock(32, 8, 1);
209 dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
211 set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
212 cudaSafeCall( cudaGetLastError() );
215 cudaSafeCall ( cudaDeviceSynchronize() );
218 template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
219 template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, int channels, cudaStream_t stream);
220 template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, int channels, cudaStream_t stream);
221 template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, int channels, cudaStream_t stream);
222 template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, int channels, cudaStream_t stream);
223 template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, int channels, cudaStream_t stream);
224 template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, int channels, cudaStream_t stream);
226 ///////////////////////////////////////////////////////////////////////////
227 //////////////////////////////// ConvertTo ////////////////////////////////
228 ///////////////////////////////////////////////////////////////////////////
230 template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
232 Convertor(S alpha_, S beta_) : alpha(alpha_), beta(beta_) {}
234 __device__ __forceinline__ D operator()(typename TypeTraits<T>::ParameterType src) const
236 return saturate_cast<D>(alpha * src + beta);
244 template <size_t src_size, size_t dst_size, typename F> struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits<F>
247 template <typename F> struct ConvertTraitsDispatcher<1, 1, F> : DefaultTransformFunctorTraits<F>
249 enum { smart_shift = 8 };
251 template <typename F> struct ConvertTraitsDispatcher<1, 2, F> : DefaultTransformFunctorTraits<F>
253 enum { smart_shift = 4 };
255 template <typename F> struct ConvertTraitsDispatcher<1, 4, F> : DefaultTransformFunctorTraits<F>
257 enum { smart_block_dim_y = 8 };
258 enum { smart_shift = 4 };
261 template <typename F> struct ConvertTraitsDispatcher<2, 2, F> : DefaultTransformFunctorTraits<F>
263 enum { smart_shift = 4 };
265 template <typename F> struct ConvertTraitsDispatcher<2, 4, F> : DefaultTransformFunctorTraits<F>
267 enum { smart_shift = 2 };
270 template <typename F> struct ConvertTraitsDispatcher<4, 2, F> : DefaultTransformFunctorTraits<F>
272 enum { smart_block_dim_y = 8 };
273 enum { smart_shift = 4 };
275 template <typename F> struct ConvertTraitsDispatcher<4, 4, F> : DefaultTransformFunctorTraits<F>
277 enum { smart_block_dim_y = 8 };
278 enum { smart_shift = 2 };
281 template <typename F> struct ConvertTraits : ConvertTraitsDispatcher<sizeof(typename F::argument_type), sizeof(typename F::result_type), F>
286 template <typename T, typename D, typename S> struct TransformFunctorTraits< Convertor<T, D, S> > : detail::ConvertTraits< Convertor<T, D, S> >
290 template<typename T, typename D, typename S>
291 void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
293 cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
294 cudaSafeCall( cudaSetDoubleForDevice(&beta) );
295 Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
296 cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
299 #if defined __clang__
300 # pragma clang diagnostic push
301 # pragma clang diagnostic ignored "-Wmissing-declarations"
304 void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
306 typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
308 static const caller_t tab[7][7] =
311 cvt_<uchar, uchar, float>,
312 cvt_<uchar, schar, float>,
313 cvt_<uchar, ushort, float>,
314 cvt_<uchar, short, float>,
315 cvt_<uchar, int, float>,
316 cvt_<uchar, float, float>,
317 cvt_<uchar, double, double>
320 cvt_<schar, uchar, float>,
321 cvt_<schar, schar, float>,
322 cvt_<schar, ushort, float>,
323 cvt_<schar, short, float>,
324 cvt_<schar, int, float>,
325 cvt_<schar, float, float>,
326 cvt_<schar, double, double>
329 cvt_<ushort, uchar, float>,
330 cvt_<ushort, schar, float>,
331 cvt_<ushort, ushort, float>,
332 cvt_<ushort, short, float>,
333 cvt_<ushort, int, float>,
334 cvt_<ushort, float, float>,
335 cvt_<ushort, double, double>
338 cvt_<short, uchar, float>,
339 cvt_<short, schar, float>,
340 cvt_<short, ushort, float>,
341 cvt_<short, short, float>,
342 cvt_<short, int, float>,
343 cvt_<short, float, float>,
344 cvt_<short, double, double>
347 cvt_<int, uchar, float>,
348 cvt_<int, schar, float>,
349 cvt_<int, ushort, float>,
350 cvt_<int, short, float>,
351 cvt_<int, int, double>,
352 cvt_<int, float, double>,
353 cvt_<int, double, double>
356 cvt_<float, uchar, float>,
357 cvt_<float, schar, float>,
358 cvt_<float, ushort, float>,
359 cvt_<float, short, float>,
360 cvt_<float, int, float>,
361 cvt_<float, float, float>,
362 cvt_<float, double, double>
365 cvt_<double, uchar, double>,
366 cvt_<double, schar, double>,
367 cvt_<double, ushort, double>,
368 cvt_<double, short, double>,
369 cvt_<double, int, double>,
370 cvt_<double, float, double>,
371 cvt_<double, double, double>
375 caller_t func = tab[sdepth][ddepth];
376 func(src, dst, alpha, beta, stream);
379 #if defined __clang__
380 # pragma clang diagnostic pop
382 }}} // namespace cv { namespace gpu { namespace device