modules/gpu/src/cuda/pyr_up.cu

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #if !defined CUDA_DISABLER
  44
  45 #include "internal_shared.hpp"
  46 #include "opencv2/gpu/device/border_interpolate.hpp"
  47 #include "opencv2/gpu/device/vec_traits.hpp"
  48 #include "opencv2/gpu/device/vec_math.hpp"
  49 #include "opencv2/gpu/device/saturate_cast.hpp"
  50
  51 namespace cv { namespace gpu { namespace device
  52 {
  53     namespace imgproc
  54     {
  55         template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
  56         {
  57             typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
  58
  59             const int x = blockIdx.x * blockDim.x + threadIdx.x;
  60             const int y = blockIdx.y * blockDim.y + threadIdx.y;
  61
  62             __shared__ sum_t s_srcPatch[10][10];
  63             __shared__ sum_t s_dstPatch[20][16];
  64
  65             if (threadIdx.x < 10 && threadIdx.y < 10)
  66             {
  67                 int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
  68                 int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
  69
  70                 srcx = ::abs(srcx);
  71                 srcx = ::min(src.cols - 1, srcx);
  72
  73                 srcy = ::abs(srcy);
  74                 srcy = ::min(src.rows - 1, srcy);
  75
  76                 s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
  77             }
  78
  79             __syncthreads();
  80
  81             sum_t sum = VecTraits<sum_t>::all(0);
  82
  83             const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
  84             const int oddFlag  = static_cast<int>((threadIdx.x & 1) != 0);
  85             const bool eveny = ((threadIdx.y & 1) == 0);
  86             const int tidx = threadIdx.x;
  87
  88             if (eveny)
  89             {
  90                 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
  91                 sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
  92                 sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx    ) >> 1)];
  93                 sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
  94                 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
  95             }
  96
  97             s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
  98
  99             if (threadIdx.y < 2)
 100             {
 101                 sum = VecTraits<sum_t>::all(0);
 102
 103                 if (eveny)
 104                 {
 105                     sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
 106                     sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
 107                     sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx    ) >> 1)];
 108                     sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
 109                     sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
 110                 }
 111
 112                 s_dstPatch[threadIdx.y][threadIdx.x] = sum;
 113             }
 114
 115             if (threadIdx.y > 13)
 116             {
 117                 sum = VecTraits<sum_t>::all(0);
 118
 119                 if (eveny)
 120                 {
 121                     sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
 122                     sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
 123                     sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx    ) >> 1)];
 124                     sum = sum + ( oddFlag * 0.25f  ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
 125                     sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
 126                 }
 127
 128                 s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
 129             }
 130
 131             __syncthreads();
 132
 133             sum = VecTraits<sum_t>::all(0);
 134
 135             const int tidy = threadIdx.y;
 136
 137             sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
 138             sum = sum + 0.25f   * s_dstPatch[2 + tidy - 1][threadIdx.x];
 139             sum = sum + 0.375f  * s_dstPatch[2 + tidy    ][threadIdx.x];
 140             sum = sum + 0.25f   * s_dstPatch[2 + tidy + 1][threadIdx.x];
 141             sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
 142
 143             if (x < dst.cols && y < dst.rows)
 144                 dst(y, x) = saturate_cast<T>(4.0f * sum);
 145         }
 146
 147         template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
 148         {
 149             const dim3 block(16, 16);
 150             const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
 151
 152             pyrUp<<<grid, block, 0, stream>>>(src, dst);
 153             cudaSafeCall( cudaGetLastError() );
 154
 155             if (stream == 0)
 156                 cudaSafeCall( cudaDeviceSynchronize() );
 157         }
 158
 159         template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
 160         {
 161             pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
 162         }
 163
 164         template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 165         //template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 166         template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 167         template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 168
 169         //template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 170         //template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 171         //template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 172         //template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 173
 174         template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 175         //template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 176         template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 177         template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 178
 179         template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 180         //template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 181         template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 182         template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 183
 184         //template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 185         //template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 186         //template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 187         //template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 188
 189         template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 190         //template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 191         template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 192         template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 193     } // namespace imgproc
 194 }}} // namespace cv { namespace gpu { namespace device
 195
 196 #endif /* CUDA_DISABLER */