modules/gpu/src/cuda/resize.cu

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #if !defined CUDA_DISABLER
  44
  45 #include "internal_shared.hpp"
  46 #include "opencv2/gpu/device/border_interpolate.hpp"
  47 #include "opencv2/gpu/device/vec_traits.hpp"
  48 #include "opencv2/gpu/device/vec_math.hpp"
  49 #include "opencv2/gpu/device/saturate_cast.hpp"
  50 #include "opencv2/gpu/device/filters.hpp"
  51 #include <cfloat>
  52 #include <opencv2/gpu/device/scan.hpp>
  53
  54 namespace cv { namespace gpu { namespace device
  55 {
  56     namespace imgproc
  57     {
  58         template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
  59         {
  60             const int x = blockDim.x * blockIdx.x + threadIdx.x;
  61             const int y = blockDim.y * blockIdx.y + threadIdx.y;
  62
  63             if (x < dst.cols && y < dst.rows)
  64             {
  65                 const float xcoo = x * fx;
  66                 const float ycoo = y * fy;
  67
  68                 dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
  69             }
  70         }
  71
  72         template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
  73         {
  74             const int x = blockDim.x * blockIdx.x + threadIdx.x;
  75             const int y = blockDim.y * blockIdx.y + threadIdx.y;
  76
  77             if (x < dst.cols && y < dst.rows)
  78             {
  79                 dst(y, x) = saturate_cast<T>(src(y, x));
  80             }
  81         }
  82
  83         template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
  84         {
  85             static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
  86             {
  87                 dim3 block(32, 8);
  88                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
  89
  90                 BrdReplicate<T> brd(src.rows, src.cols);
  91                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
  92                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
  93
  94                 resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
  95                 cudaSafeCall( cudaGetLastError() );
  96             }
  97         };
  98
  99         template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
 100         {
 101             static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
 102             {
 103                 dim3 block(32, 8);
 104                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
 105
 106                 BrdConstant<T> brd(src.rows, src.cols);
 107                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
 108                 AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
 109                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
 110                 cudaSafeCall( cudaGetLastError() );
 111                 if (stream == 0)
 112                     cudaSafeCall( cudaDeviceSynchronize() );
 113             }
 114         };
 115
 116         template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
 117         {
 118             static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
 119             {
 120                 dim3 block(32, 8);
 121                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
 122                 BrdConstant<T> brd(src.rows, src.cols);
 123                 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
 124                 IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
 125                 resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
 126                 cudaSafeCall( cudaGetLastError() );
 127                 if (stream == 0)
 128                     cudaSafeCall( cudaDeviceSynchronize() );
 129             }
 130         };
 131
 132         template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
 133         {
 134             static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
 135             {
 136                 (void)srcWhole;
 137                 (void)xoff;
 138                 (void)yoff;
 139
 140                 dim3 block(32, 8);
 141                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
 142
 143                 BrdReplicate<T> brd(src.rows, src.cols);
 144                 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
 145                 Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
 146
 147                 resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
 148                 cudaSafeCall( cudaGetLastError() );
 149
 150                 cudaSafeCall( cudaDeviceSynchronize() );
 151             }
 152         };
 153
 154         #define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
 155             texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
 156             struct tex_resize_ ## type ## _reader \
 157             { \
 158                 typedef type elem_type; \
 159                 typedef int index_type; \
 160                 const int xoff; \
 161                 const int yoff; \
 162                 __host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
 163                 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
 164                 { \
 165                     return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
 166                 } \
 167             }; \
 168             template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
 169             { \
 170                 static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
 171                 { \
 172                     dim3 block(32, 8); \
 173                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
 174                     bindTexture(&tex_resize_ ## type, srcWhole); \
 175                     tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
 176                     if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
 177                     { \
 178                         Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
 179                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
 180                     } \
 181                     else \
 182                     { \
 183                         BrdReplicate< type > brd(src.rows, src.cols); \
 184                         BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
 185                         Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
 186                         resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
 187                     } \
 188                     cudaSafeCall( cudaGetLastError() ); \
 189                     cudaSafeCall( cudaDeviceSynchronize() ); \
 190                 } \
 191             };
 192
 193         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
 194         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
 195
 196         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
 197         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
 198
 199         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
 200         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
 201
 202         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
 203         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
 204
 205         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
 206         //OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
 207
 208         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
 209         OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
 210
 211         #undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
 212
 213         template <template <typename> class Filter, typename T> struct ResizeDispatcher
 214         {
 215             static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
 216             {
 217                 if (stream == 0)
 218                     ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
 219                 else
 220                     ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
 221             }
 222         };
 223
 224         template <typename T> struct ResizeDispatcher<AreaFilter, T>
 225         {
 226             static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
 227             {
 228                 (void)srcWhole;
 229                 (void)xoff;
 230                 (void)yoff;
 231                 int iscale_x = (int)round(fx);
 232                 int iscale_y = (int)round(fy);
 233
 234                 if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
 235                     ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
 236                 else
 237                     ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
 238             }
 239         };
 240
 241         template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
 242             PtrStepSzb dst, int interpolation, cudaStream_t stream)
 243         {
 244             typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
 245
 246             static const caller_t callers[4] =
 247             {
 248                 ResizeDispatcher<PointFilter, T>::call,
 249                 ResizeDispatcher<LinearFilter, T>::call,
 250                 ResizeDispatcher<CubicFilter, T>::call,
 251                 ResizeDispatcher<AreaFilter, T>::call
 252             };
 253             // chenge to linear if area interpolation upscaling
 254             if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
 255                 interpolation = 1;
 256
 257             callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
 258                 static_cast< PtrStepSz<T> >(dst), stream);
 259         }
 260
 261         template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 262         //template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 263         template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 264         template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 265
 266         //template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 267         //template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 268         //template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 269         //template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 270
 271         template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 272         //template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 273         template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 274         template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 275
 276         template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 277         //template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 278         template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 279         template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 280
 281         //template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 282         //template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 283         //template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 284         //template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 285
 286         template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 287         //template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 288         template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 289         template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
 290
 291         template<typename T> struct scan_traits{};
 292
 293         template<> struct scan_traits<uchar>
 294         {
 295             typedef float scan_line_type;
 296         };
 297
 298     } // namespace imgproc
 299 }}} // namespace cv { namespace gpu { namespace device
 300
 301
 302 #endif /* CUDA_DISABLER */