modules/gpu/src/cuda/resize.cu

   1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
   2 //\r
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
   4 //\r
   5 //  By downloading, copying, installing or using the software you agree to this license.\r
   6 //  If you do not agree to this license, do not download, install,\r
   7 //  copy or use the software.\r
   8 //\r
   9 //\r
  10 //                           License Agreement\r
  11 //                For Open Source Computer Vision Library\r
  12 //\r
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
  15 // Third party copyrights are property of their respective owners.\r
  16 //\r
  17 // Redistribution and use in source and binary forms, with or without modification,\r
  18 // are permitted provided that the following conditions are met:\r
  19 //\r
  20 //   * Redistribution's of source code must retain the above copyright notice,\r
  21 //     this list of conditions and the following disclaimer.\r
  22 //\r
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
  24 //     this list of conditions and the following disclaimer in the documentation\r
  25 //     and/or other materials provided with the distribution.\r
  26 //\r
  27 //   * The name of the copyright holders may not be used to endorse or promote products\r
  28 //     derived from this software without specific prior written permission.\r
  29 //\r
  30 // This software is provided by the copyright holders and contributors "as is" and\r
  31 // any express or implied warranties, including, but not limited to, the implied\r
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
  34 // indirect, incidental, special, exemplary, or consequential damages\r
  35 // (including, but not limited to, procurement of substitute goods or services;\r
  36 // loss of use, data, or profits; or business interruption) however caused\r
  37 // and on any theory of liability, whether in contract, strict liability,\r
  38 // or tort (including negligence or otherwise) arising in any way out of\r
  39 // the use of this software, even if advised of the possibility of such damage.\r
  40 //\r
  41 //M*/\r
  42 \r
  43 #include "internal_shared.hpp"\r
  44 #include "opencv2/gpu/device/border_interpolate.hpp"\r
  45 #include "opencv2/gpu/device/vec_traits.hpp"\r
  46 #include "opencv2/gpu/device/vec_math.hpp"\r
  47 #include "opencv2/gpu/device/saturate_cast.hpp"\r
  48 #include "opencv2/gpu/device/filters.hpp"\r
  49 # include <cfloat>\r
  50 \r
  51 namespace cv { namespace gpu { namespace device\r
  52 {\r
  53     namespace imgproc\r
  54     {\r
  55         template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
  56         {\r
  57             const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
  58             const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
  59 \r
  60             if (x < dst.cols && y < dst.rows)\r
  61             {\r
  62                 const float xcoo = x * fx;\r
  63                 const float ycoo = y * fy;\r
  64 \r
  65                 dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));\r
  66             }\r
  67         }\r
  68 \r
  69         template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
  70         {\r
  71             const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
  72             const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
  73 \r
  74             if (x < dst.cols && y < dst.rows)\r
  75             {\r
  76                 dst(y, x) = saturate_cast<T>(src(y, x));\r
  77             }\r
  78         }\r
  79 \r
  80         template <template <typename> class Filter, typename T> struct ResizeDispatcherStream\r
  81         {\r
  82             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
  83             {\r
  84                 dim3 block(32, 8);\r
  85                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
  86 \r
  87                 BrdReplicate<T> brd(src.rows, src.cols);\r
  88                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);\r
  89                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);\r
  90 \r
  91                 resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
  92                 cudaSafeCall( cudaGetLastError() );\r
  93             }\r
  94         };\r
  95 \r
  96         template <typename T> struct ResizeDispatcherStream<AreaFilter, T>\r
  97         {\r
  98             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
  99             {\r
 100                 dim3 block(32, 8);\r
 101                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
 102 \r
 103                 BrdConstant<T> brd(src.rows, src.cols);\r
 104                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);\r
 105                 AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);\r
 106                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
 107                 cudaSafeCall( cudaGetLastError() );\r
 108             }\r
 109         };\r
 110 \r
 111         template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>\r
 112         {\r
 113             static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
 114             {\r
 115                 dim3 block(32, 8);\r
 116                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
 117 \r
 118                 BrdConstant<T> brd(src.rows, src.cols);\r
 119                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);\r
 120                 IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);\r
 121                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);\r
 122                 cudaSafeCall( cudaGetLastError() );\r
 123             }\r
 124         };\r
 125 \r
 126         template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream\r
 127         {\r
 128             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)\r
 129             {\r
 130                 dim3 block(32, 8);\r
 131                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
 132 \r
 133                 BrdReplicate<T> brd(src.rows, src.cols);\r
 134                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);\r
 135                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);\r
 136 \r
 137                 resize<<<grid, block>>>(filteredSrc, fx, fy, dst);\r
 138                 cudaSafeCall( cudaGetLastError() );\r
 139 \r
 140                 cudaSafeCall( cudaDeviceSynchronize() );\r
 141             }\r
 142         };\r
 143 \r
 144         #define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \\r
 145             texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \\r
 146             struct tex_resize_ ## type ## _reader \\r
 147             { \\r
 148                 typedef type elem_type; \\r
 149                 typedef int index_type; \\r
 150                 const int xoff; \\r
 151                 const int yoff; \\r
 152                 __host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \\r
 153                 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \\r
 154                 { \\r
 155                     return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \\r
 156                 } \\r
 157             }; \\r
 158             template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \\r
 159             { \\r
 160                 static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \\r
 161                 { \\r
 162                     dim3 block(32, 8); \\r
 163                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
 164                     bindTexture(&tex_resize_ ## type, srcWhole); \\r
 165                     tex_resize_ ## type ## _reader texSrc(xoff, yoff); \\r
 166                     if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \\r
 167                     { \\r
 168                         Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \\r
 169                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \\r
 170                     } \\r
 171                     else \\r
 172                     { \\r
 173                         BrdReplicate< type > brd(src.rows, src.cols); \\r
 174                         BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \\r
 175                         Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \\r
 176                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \\r
 177                     } \\r
 178                     cudaSafeCall( cudaGetLastError() ); \\r
 179                     cudaSafeCall( cudaDeviceSynchronize() ); \\r
 180                 } \\r
 181             };\r
 182 \r
 183         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)\r
 184         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)\r
 185 \r
 186         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)\r
 187         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)\r
 188 \r
 189         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)\r
 190         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)\r
 191 \r
 192         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)\r
 193         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)\r
 194 \r
 195         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)\r
 196         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)\r
 197 \r
 198         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)\r
 199         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)\r
 200 \r
 201         #undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX\r
 202 \r
 203         template <template <typename> class Filter, typename T> struct ResizeDispatcher\r
 204         {\r
 205             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
 206             {\r
 207                 if (stream == 0)\r
 208                     ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);\r
 209                 else\r
 210                     ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);\r
 211             }\r
 212         };\r
 213 \r
 214         template <typename T> struct ResizeDispatcher<AreaFilter, T>\r
 215         {\r
 216             static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)\r
 217             {\r
 218                 int iscale_x = round(fx);\r
 219                 int iscale_y = round(fy);\r
 220 \r
 221                 if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)\r
 222                     ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);\r
 223                 else\r
 224                     ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);\r
 225             }\r
 226         };\r
 227 \r
 228         template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, \r
 229             DevMem2Db dst, int interpolation, cudaStream_t stream)\r
 230         {\r
 231             typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);\r
 232 \r
 233             static const caller_t callers[4] =\r
 234             {\r
 235                 ResizeDispatcher<PointFilter, T>::call,\r
 236                 ResizeDispatcher<LinearFilter, T>::call,\r
 237                 ResizeDispatcher<CubicFilter, T>::call,\r
 238                 ResizeDispatcher<AreaFilter, T>::call\r
 239             };\r
 240 \r
 241             callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy, \r
 242                 static_cast< DevMem2D_<T> >(dst), stream);\r
 243         }\r
 244 \r
 245         template void resize_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 246         //template void resize_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 247         template void resize_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 248         template void resize_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 249 \r
 250         //template void resize_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 251         //template void resize_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 252         //template void resize_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 253         //template void resize_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 254 \r
 255         template void resize_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 256         //template void resize_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 257         template void resize_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 258         template void resize_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 259 \r
 260         template void resize_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 261         //template void resize_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 262         template void resize_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 263         template void resize_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 264 \r
 265         //template void resize_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 266         //template void resize_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 267         //template void resize_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 268         //template void resize_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 269 \r
 270         template void resize_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 271         //template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 272         template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 273         template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
 274     } // namespace imgproc\r
 275 }}} // namespace cv { namespace gpu { namespace device\r