modules/gpu/src/cuda/resize.cu

   1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
   2 //\r
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
   4 //\r
   5 //  By downloading, copying, installing or using the software you agree to this license.\r
   6 //  If you do not agree to this license, do not download, install,\r
   7 //  copy or use the software.\r
   8 //\r
   9 //\r
  10 //                           License Agreement\r
  11 //                For Open Source Computer Vision Library\r
  12 //\r
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
  15 // Third party copyrights are property of their respective owners.\r
  16 //\r
  17 // Redistribution and use in source and binary forms, with or without modification,\r
  18 // are permitted provided that the following conditions are met:\r
  19 //\r
  20 //   * Redistribution's of source code must retain the above copyright notice,\r
  21 //     this list of conditions and the following disclaimer.\r
  22 //\r
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
  24 //     this list of conditions and the following disclaimer in the documentation\r
  25 //     and/or other materials provided with the distribution.\r
  26 //\r
  27 //   * The name of the copyright holders may not be used to endorse or promote products\r
  28 //     derived from this software without specific prior written permission.\r
  29 //\r
  30 // This software is provided by the copyright holders and contributors "as is" and\r
  31 // any express or implied warranties, including, but not limited to, the implied\r
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
  34 // indirect, incidental, special, exemplary, or consequential damages\r
  35 // (including, but not limited to, procurement of substitute goods or services;\r
  36 // loss of use, data, or profits; or business interruption) however caused\r
  37 // and on any theory of liability, whether in contract, strict liability,\r
  38 // or tort (including negligence or otherwise) arising in any way out of\r
  39 // the use of this software, even if advised of the possibility of such damage.\r
  40 //\r
  41 //M*/\r
  42 \r
  43 #include "internal_shared.hpp"\r
  44 #include "opencv2/gpu/device/border_interpolate.hpp"\r
  45 #include "opencv2/gpu/device/vec_traits.hpp"\r
  46 #include "opencv2/gpu/device/vec_math.hpp"\r
  47 #include "opencv2/gpu/device/saturate_cast.hpp"\r
  48 #include "opencv2/gpu/device/filters.hpp"\r
  49 # include <cfloat>\r
  50 \r
  51 namespace cv { namespace gpu { namespace device\r
  52 {\r
  53     namespace imgproc\r
  54     {\r
  55         template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
  56         {\r
  57             const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
  58             const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
  59 \r
  60             if (x < dst.cols && y < dst.rows)\r
  61             {\r
  62                 const float xcoo = x * fx;\r
  63                 const float ycoo = y * fy;\r
  64 \r
  65                 dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));\r
  66             }\r
  67         }\r
  68 \r
  69         template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
  70         {\r
  71             const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
  72             const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
  73 \r
  74             if (x < dst.cols && y < dst.rows)\r
  75             {\r
  76                 dst(y, x) = saturate_cast<T>(src(y, x));\r
  77             }\r
  78         }\r
  79 \r
  80         template <template <typename> class Filter, typename T> struct ResizeDispatcherStream\r
  81         {\r
  82             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
  83             {\r
  84                 dim3 block(32, 8);\r
  85                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
  86 \r
  87                 BrdReplicate<T> brd(src.rows, src.cols);\r
  88                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);\r
  89                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);\r
  90 \r
  91                 resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
  92                 cudaSafeCall( cudaGetLastError() );\r
  93             }\r
  94         };\r
  95 \r
  96         template <typename T> struct ResizeDispatcherStream<AreaFilter, T>\r
  97         {\r
  98             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
  99             {\r
 100                 dim3 block(32, 8);\r
 101                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
 102 \r
 103                 BrdConstant<T> brd(src.rows, src.cols);\r
 104                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);\r
 105                 AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);\r
 106                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
 107                 cudaSafeCall( cudaGetLastError() );\r
 108                 if (stream == 0)\r
 109                     cudaSafeCall( cudaDeviceSynchronize() );\r
 110             }\r
 111         };\r
 112 \r
 113         template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>\r
 114         {\r
 115             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
 116             {\r
 117                 dim3 block(32, 8);\r
 118                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
 119 \r
 120                 BrdConstant<T> brd(src.rows, src.cols);\r
 121                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);\r
 122                 IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);\r
 123                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
 124                 cudaSafeCall( cudaGetLastError() );\r
 125                 if (stream == 0)\r
 126                     cudaSafeCall( cudaDeviceSynchronize() );\r
 127             }\r
 128         };\r
 129 \r
 130         template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream\r
 131         {\r
 132             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)\r
 133             {\r
 134                 dim3 block(32, 8);\r
 135                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
 136 \r
 137                 BrdReplicate<T> brd(src.rows, src.cols);\r
 138                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);\r
 139                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);\r
 140 \r
 141                 resize<<<grid, block>>>(filteredSrc, fx, fy, dst);\r
 142                 cudaSafeCall( cudaGetLastError() );\r
 143 \r
 144                 cudaSafeCall( cudaDeviceSynchronize() );\r
 145             }\r
 146         };\r
 147 \r
 148         #define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \\r
 149             texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \\r
 150             struct tex_resize_ ## type ## _reader \\r
 151             { \\r
 152                 typedef type elem_type; \\r
 153                 typedef int index_type; \\r
 154                 const int xoff; \\r
 155                 const int yoff; \\r
 156                 __host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \\r
 157                 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \\r
 158                 { \\r
 159                     return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \\r
 160                 } \\r
 161             }; \\r
 162             template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \\r
 163             { \\r
 164                 static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \\r
 165                 { \\r
 166                     dim3 block(32, 8); \\r
 167                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
 168                     bindTexture(&tex_resize_ ## type, srcWhole); \\r
 169                     tex_resize_ ## type ## _reader texSrc(xoff, yoff); \\r
 170                     if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \\r
 171                     { \\r
 172                         Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \\r
 173                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \\r
 174                     } \\r
 175                     else \\r
 176                     { \\r
 177                         BrdReplicate< type > brd(src.rows, src.cols); \\r
 178                         BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \\r
 179                         Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \\r
 180                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \\r
 181                     } \\r
 182                     cudaSafeCall( cudaGetLastError() ); \\r
 183                     cudaSafeCall( cudaDeviceSynchronize() ); \\r
 184                 } \\r
 185             };\r
 186 \r
 187         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)\r
 188         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)\r
 189 \r
 190         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)\r
 191         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)\r
 192 \r
 193         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)\r
 194         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)\r
 195 \r
 196         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)\r
 197         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)\r
 198 \r
 199         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)\r
 200         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)\r
 201 \r
 202         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)\r
 203         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)\r
 204 \r
 205         #undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX\r
 206 \r
 207         template <template <typename> class Filter, typename T> struct ResizeDispatcher\r
 208         {\r
 209             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
 210             {\r
 211                 if (stream == 0)\r
 212                     ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);\r
 213                 else\r
 214                     ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);\r
 215             }\r
 216         };\r
 217 \r
 218         template <typename T> struct ResizeDispatcher<AreaFilter, T>\r
 219         {\r
 220             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
 221             {\r
 222                 int iscale_x = round(fx);\r
 223                 int iscale_y = round(fy);\r
 224 \r
 225                 if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)\r
 226                     ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);\r
 227                 else\r
 228                     ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);\r
 229             }\r
 230         };\r
 231 \r
 232         template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, \r
 233             DevMem2Db dst, int interpolation, cudaStream_t stream)\r
 234         {\r
 235             typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);\r
 236 \r
 237             static const caller_t callers[4] =\r
 238             {\r
 239                 ResizeDispatcher<PointFilter, T>::call,\r
 240                 ResizeDispatcher<LinearFilter, T>::call,\r
 241                 ResizeDispatcher<CubicFilter, T>::call,\r
 242                 ResizeDispatcher<AreaFilter, T>::call\r
 243             };\r
 244 \r
 245             callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy, \r
 246                 static_cast< DevMem2D_<T> >(dst), stream);\r
 247         }\r
 248 \r
 249         template void resize_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 250         //template void resize_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 251         template void resize_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 252         template void resize_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 253 \r
 254         //template void resize_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 255         //template void resize_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 256         //template void resize_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 257         //template void resize_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 258 \r
 259         template void resize_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 260         //template void resize_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 261         template void resize_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 262         template void resize_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 263 \r
 264         template void resize_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 265         //template void resize_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 266         template void resize_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 267         template void resize_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 268 \r
 269         //template void resize_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 270         //template void resize_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 271         //template void resize_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 272         //template void resize_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 273 \r
 274         template void resize_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 275         //template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 276         template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 277         template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 278     } // namespace imgproc\r
 279 }}} // namespace cv { namespace gpu { namespace device\r