performance test for INTER_AREA resize has been added.
[profile/ivi/opencv.git] / modules / gpu / src / cuda / resize.cu
1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
2 //\r
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
4 //\r
5 //  By downloading, copying, installing or using the software you agree to this license.\r
6 //  If you do not agree to this license, do not download, install,\r
7 //  copy or use the software.\r
8 //\r
9 //\r
10 //                           License Agreement\r
11 //                For Open Source Computer Vision Library\r
12 //\r
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
15 // Third party copyrights are property of their respective owners.\r
16 //\r
17 // Redistribution and use in source and binary forms, with or without modification,\r
18 // are permitted provided that the following conditions are met:\r
19 //\r
20 //   * Redistribution's of source code must retain the above copyright notice,\r
21 //     this list of conditions and the following disclaimer.\r
22 //\r
23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
24 //     this list of conditions and the following disclaimer in the documentation\r
25 //     and/or other materials provided with the distribution.\r
26 //\r
27 //   * The name of the copyright holders may not be used to endorse or promote products\r
28 //     derived from this software without specific prior written permission.\r
29 //\r
30 // This software is provided by the copyright holders and contributors "as is" and\r
31 // any express or implied warranties, including, but not limited to, the implied\r
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
34 // indirect, incidental, special, exemplary, or consequential damages\r
35 // (including, but not limited to, procurement of substitute goods or services;\r
36 // loss of use, data, or profits; or business interruption) however caused\r
37 // and on any theory of liability, whether in contract, strict liability,\r
38 // or tort (including negligence or otherwise) arising in any way out of\r
39 // the use of this software, even if advised of the possibility of such damage.\r
40 //\r
41 //M*/\r
42 \r
43 #include "internal_shared.hpp"\r
44 #include "opencv2/gpu/device/border_interpolate.hpp"\r
45 #include "opencv2/gpu/device/vec_traits.hpp"\r
46 #include "opencv2/gpu/device/vec_math.hpp"\r
47 #include "opencv2/gpu/device/saturate_cast.hpp"\r
48 #include "opencv2/gpu/device/filters.hpp"\r
49 # include <cfloat>\r
50 \r
51 namespace cv { namespace gpu { namespace device\r
52 {\r
53     namespace imgproc\r
54     {\r
55         template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
56         {\r
57             const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
58             const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
59 \r
60             if (x < dst.cols && y < dst.rows)\r
61             {\r
62                 const float xcoo = x * fx;\r
63                 const float ycoo = y * fy;\r
64 \r
65                 dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));\r
66             }\r
67         }\r
68 \r
69         template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
70         {\r
71             const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
72             const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
73 \r
74             if (x < dst.cols && y < dst.rows)\r
75             {\r
76                 dst(y, x) = saturate_cast<T>(src(y, x));\r
77             }\r
78         }\r
79 \r
80         template <template <typename> class Filter, typename T> struct ResizeDispatcherStream\r
81         {\r
82             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
83             {\r
84                 dim3 block(32, 8);\r
85                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
86 \r
87                 BrdReplicate<T> brd(src.rows, src.cols);\r
88                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);\r
89                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);\r
90 \r
91                 resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
92                 cudaSafeCall( cudaGetLastError() );\r
93             }\r
94         };\r
95 \r
96         template <typename T> struct ResizeDispatcherStream<AreaFilter, T>\r
97         {\r
98             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
99             {\r
100                 dim3 block(32, 8);\r
101                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
102 \r
103                 BrdConstant<T> brd(src.rows, src.cols);\r
104                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);\r
105                 AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);\r
106                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
107                 cudaSafeCall( cudaGetLastError() );\r
108                 if (stream == 0)\r
109                     cudaSafeCall( cudaDeviceSynchronize() );\r
110             }\r
111         };\r
112 \r
113         template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>\r
114         {\r
115             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
116             {\r
117                 dim3 block(32, 8);\r
118                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
119 \r
120                 BrdConstant<T> brd(src.rows, src.cols);\r
121                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);\r
122                 IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);\r
123                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
124                 cudaSafeCall( cudaGetLastError() );\r
125                 if (stream == 0)\r
126                     cudaSafeCall( cudaDeviceSynchronize() );\r
127             }\r
128         };\r
129 \r
130         template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream\r
131         {\r
132             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)\r
133             {\r
134                 dim3 block(32, 8);\r
135                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
136 \r
137                 BrdReplicate<T> brd(src.rows, src.cols);\r
138                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);\r
139                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);\r
140 \r
141                 resize<<<grid, block>>>(filteredSrc, fx, fy, dst);\r
142                 cudaSafeCall( cudaGetLastError() );\r
143 \r
144                 cudaSafeCall( cudaDeviceSynchronize() );\r
145             }\r
146         };\r
147 \r
148         #define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \\r
149             texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \\r
150             struct tex_resize_ ## type ## _reader \\r
151             { \\r
152                 typedef type elem_type; \\r
153                 typedef int index_type; \\r
154                 const int xoff; \\r
155                 const int yoff; \\r
156                 __host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \\r
157                 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \\r
158                 { \\r
159                     return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \\r
160                 } \\r
161             }; \\r
162             template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \\r
163             { \\r
164                 static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \\r
165                 { \\r
166                     dim3 block(32, 8); \\r
167                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
168                     bindTexture(&tex_resize_ ## type, srcWhole); \\r
169                     tex_resize_ ## type ## _reader texSrc(xoff, yoff); \\r
170                     if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \\r
171                     { \\r
172                         Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \\r
173                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \\r
174                     } \\r
175                     else \\r
176                     { \\r
177                         BrdReplicate< type > brd(src.rows, src.cols); \\r
178                         BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \\r
179                         Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \\r
180                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \\r
181                     } \\r
182                     cudaSafeCall( cudaGetLastError() ); \\r
183                     cudaSafeCall( cudaDeviceSynchronize() ); \\r
184                 } \\r
185             };\r
186 \r
187         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)\r
188         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)\r
189 \r
190         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)\r
191         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)\r
192 \r
193         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)\r
194         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)\r
195 \r
196         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)\r
197         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)\r
198 \r
199         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)\r
200         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)\r
201 \r
202         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)\r
203         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)\r
204 \r
205         #undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX\r
206 \r
207         template <template <typename> class Filter, typename T> struct ResizeDispatcher\r
208         {\r
209             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
210             {\r
211                 if (stream == 0)\r
212                     ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);\r
213                 else\r
214                     ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);\r
215             }\r
216         };\r
217 \r
218         template <typename T> struct ResizeDispatcher<AreaFilter, T>\r
219         {\r
220             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
221             {\r
222                 int iscale_x = round(fx);\r
223                 int iscale_y = round(fy);\r
224 \r
225                 if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)\r
226                     ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);\r
227                 else\r
228                     ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);\r
229             }\r
230         };\r
231 \r
232         template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, \r
233             DevMem2Db dst, int interpolation, cudaStream_t stream)\r
234         {\r
235             typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);\r
236 \r
237             static const caller_t callers[4] =\r
238             {\r
239                 ResizeDispatcher<PointFilter, T>::call,\r
240                 ResizeDispatcher<LinearFilter, T>::call,\r
241                 ResizeDispatcher<CubicFilter, T>::call,\r
242                 ResizeDispatcher<AreaFilter, T>::call\r
243             };\r
244 \r
245             callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy, \r
246                 static_cast< DevMem2D_<T> >(dst), stream);\r
247         }\r
248 \r
249         template void resize_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
250         //template void resize_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
251         template void resize_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
252         template void resize_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
253 \r
254         //template void resize_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
255         //template void resize_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
256         //template void resize_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
257         //template void resize_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
258 \r
259         template void resize_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
260         //template void resize_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
261         template void resize_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
262         template void resize_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
263 \r
264         template void resize_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
265         //template void resize_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
266         template void resize_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
267         template void resize_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
268 \r
269         //template void resize_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
270         //template void resize_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
271         //template void resize_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
272         //template void resize_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
273 \r
274         template void resize_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
275         //template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
276         template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
277         template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
278     } // namespace imgproc\r
279 }}} // namespace cv { namespace gpu { namespace device\r