1 /*M///////////////////////////////////////////////////////////////////////////////////////
\r
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
\r
5 // By downloading, copying, installing or using the software you agree to this license.
\r
6 // If you do not agree to this license, do not download, install,
\r
7 // copy or use the software.
\r
10 // License Agreement
\r
11 // For Open Source Computer Vision Library
\r
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
\r
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
\r
15 // Third party copyrights are property of their respective owners.
\r
17 // Redistribution and use in source and binary forms, with or without modification,
\r
18 // are permitted provided that the following conditions are met:
\r
20 // * Redistribution's of source code must retain the above copyright notice,
\r
21 // this list of conditions and the following disclaimer.
\r
23 // * Redistribution's in binary form must reproduce the above copyright notice,
\r
24 // this list of conditions and the following disclaimer in the documentation
\r
25 // and/or other materials provided with the distribution.
\r
27 // * The name of the copyright holders may not be used to endorse or promote products
\r
28 // derived from this software without specific prior written permission.
\r
30 // This software is provided by the copyright holders and contributors "as is" and
\r
31 // any express or implied warranties, including, but not limited to, the implied
\r
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
\r
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
\r
34 // indirect, incidental, special, exemplary, or consequential damages
\r
35 // (including, but not limited to, procurement of substitute goods or services;
\r
36 // loss of use, data, or profits; or business interruption) however caused
\r
37 // and on any theory of liability, whether in contract, strict liability,
\r
38 // or tort (including negligence or otherwise) arising in any way out of
\r
39 // the use of this software, even if advised of the possibility of such damage.
\r
43 #include "internal_shared.hpp"
\r
44 #include "opencv2/gpu/device/border_interpolate.hpp"
\r
45 #include "opencv2/gpu/device/vec_traits.hpp"
\r
46 #include "opencv2/gpu/device/vec_math.hpp"
\r
47 #include "opencv2/gpu/device/saturate_cast.hpp"
\r
48 #include "opencv2/gpu/device/filters.hpp"
\r
51 namespace cv { namespace gpu { namespace device
\r
55 template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
\r
57 const int x = blockDim.x * blockIdx.x + threadIdx.x;
\r
58 const int y = blockDim.y * blockIdx.y + threadIdx.y;
\r
60 if (x < dst.cols && y < dst.rows)
\r
62 const float xcoo = x * fx;
\r
63 const float ycoo = y * fy;
\r
65 dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
\r
69 template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
\r
71 const int x = blockDim.x * blockIdx.x + threadIdx.x;
\r
72 const int y = blockDim.y * blockIdx.y + threadIdx.y;
\r
74 if (x < dst.cols && y < dst.rows)
\r
76 dst(y, x) = saturate_cast<T>(src(y, x));
\r
80 template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
\r
82 static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
\r
85 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
\r
87 BrdReplicate<T> brd(src.rows, src.cols);
\r
88 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
\r
89 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
\r
91 resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
\r
92 cudaSafeCall( cudaGetLastError() );
\r
96 template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
\r
98 static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
\r
101 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
\r
103 BrdConstant<T> brd(src.rows, src.cols);
\r
104 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
\r
105 AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
\r
106 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
\r
107 cudaSafeCall( cudaGetLastError() );
\r
111 template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
\r
113 static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
\r
116 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
\r
118 BrdConstant<T> brd(src.rows, src.cols);
\r
119 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
\r
120 IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
\r
121 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
\r
122 cudaSafeCall( cudaGetLastError() );
\r
126 template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
\r
128 static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
\r
131 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
\r
133 BrdReplicate<T> brd(src.rows, src.cols);
\r
134 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
\r
135 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
\r
137 resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
\r
138 cudaSafeCall( cudaGetLastError() );
\r
140 cudaSafeCall( cudaDeviceSynchronize() );
\r
144 #define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
\r
145 texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
\r
146 struct tex_resize_ ## type ## _reader \
\r
148 typedef type elem_type; \
\r
149 typedef int index_type; \
\r
152 __host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
\r
153 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
\r
155 return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
\r
158 template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
\r
160 static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \
\r
162 dim3 block(32, 8); \
\r
163 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
\r
164 bindTexture(&tex_resize_ ## type, srcWhole); \
\r
165 tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
\r
166 if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
\r
168 Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
\r
169 resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
\r
173 BrdReplicate< type > brd(src.rows, src.cols); \
\r
174 BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
\r
175 Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
\r
176 resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
\r
178 cudaSafeCall( cudaGetLastError() ); \
\r
179 cudaSafeCall( cudaDeviceSynchronize() ); \
\r
183 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
\r
184 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
\r
186 //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
\r
187 //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
\r
189 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
\r
190 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
\r
192 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
\r
193 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
\r
195 //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
\r
196 //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
\r
198 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
\r
199 OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
\r
201 #undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
\r
203 template <template <typename> class Filter, typename T> struct ResizeDispatcher
\r
205 static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
\r
208 ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
\r
210 ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
\r
214 template <typename T> struct ResizeDispatcher<AreaFilter, T>
\r
216 static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
\r
218 int iscale_x = round(fx);
\r
219 int iscale_y = round(fy);
\r
221 if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
\r
222 ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
\r
224 ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
\r
228 template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
\r
229 DevMem2Db dst, int interpolation, cudaStream_t stream)
\r
231 typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
\r
233 static const caller_t callers[4] =
\r
235 ResizeDispatcher<PointFilter, T>::call,
\r
236 ResizeDispatcher<LinearFilter, T>::call,
\r
237 ResizeDispatcher<CubicFilter, T>::call,
\r
238 ResizeDispatcher<AreaFilter, T>::call
\r
241 callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
\r
242 static_cast< DevMem2D_<T> >(dst), stream);
\r
245 template void resize_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
246 //template void resize_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
247 template void resize_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
248 template void resize_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
250 //template void resize_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
251 //template void resize_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
252 //template void resize_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
253 //template void resize_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
255 template void resize_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
256 //template void resize_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
257 template void resize_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
258 template void resize_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
260 template void resize_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
261 //template void resize_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
262 template void resize_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
263 template void resize_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
265 //template void resize_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
266 //template void resize_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
267 //template void resize_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
268 //template void resize_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
270 template void resize_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
271 //template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
272 template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
273 template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
\r
274 } // namespace imgproc
\r
275 }}} // namespace cv { namespace gpu { namespace device
\r