From 21c4753fedecdf674058e8d69d3184a2f8c5a021 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 23 Aug 2013 13:46:29 +0400 Subject: [PATCH] used global memory access for up-scaling --- modules/gpu/src/cuda/resize.cu | 677 ++++++++++++++++++++++------------------- modules/gpu/src/resize.cpp | 26 +- 2 files changed, 380 insertions(+), 323 deletions(-) diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index 6ecb7eb..dc9f462 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -52,368 +52,431 @@ namespace cv { namespace gpu { namespace device { - namespace imgproc + // kernels + + template __global__ void resize_nearest(const PtrStep src, PtrStepSz dst, const float fy, const float fx) { - template __global__ void resize_nearest(const PtrStep src, const float fx, const float fy, PtrStepSz dst) - { - const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; - const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; + const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; + const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; - if (dst_x < dst.cols && dst_y < dst.rows) - { - const float src_x = dst_x * fx; - const float src_y = dst_y * fy; + if (dst_x < dst.cols && dst_y < dst.rows) + { + const float src_x = dst_x * fx; + const float src_y = dst_y * fy; - dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x)); - } + dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x)); } + } - template __global__ void resize_linear(const PtrStepSz src, const float fx, const float fy, PtrStepSz dst) - { - typedef typename TypeVec::cn>::vec_type work_type; + template __global__ void resize_linear(const PtrStepSz src, PtrStepSz dst, const float fy, const float fx) + { + typedef typename TypeVec::cn>::vec_type work_type; - const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; - const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; + const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; + const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; - if (dst_x < dst.cols && dst_y < dst.rows) - { - const float src_x = dst_x * fx; - const float src_y = dst_y * fy; + if (dst_x < dst.cols && dst_y < dst.rows) + { + const float src_x = dst_x * fx; + const float src_y = dst_y * fy; - work_type out = VecTraits::all(0); + work_type out = VecTraits::all(0); - const int x1 = __float2int_rd(src_x); - const int y1 = __float2int_rd(src_y); - const int x2 = x1 + 1; - const int y2 = y1 + 1; - const int x2_read = ::min(x2, src.cols - 1); - const int y2_read = ::min(y2, src.rows - 1); + const int x1 = __float2int_rd(src_x); + const int y1 = __float2int_rd(src_y); + const int x2 = x1 + 1; + const int y2 = y1 + 1; + const int x2_read = ::min(x2, src.cols - 1); + const int y2_read = ::min(y2, src.rows - 1); - T src_reg = src(y1, x1); - out = out + src_reg * ((x2 - src_x) * (y2 - src_y)); + T src_reg = src(y1, x1); + out = out + src_reg * ((x2 - src_x) * (y2 - src_y)); - src_reg = src(y1, x2_read); - out = out + src_reg * ((src_x - x1) * (y2 - src_y)); + src_reg = src(y1, x2_read); + out = out + src_reg * ((src_x - x1) * (y2 - src_y)); - src_reg = src(y2_read, x1); - out = out + src_reg * ((x2 - src_x) * (src_y - y1)); + src_reg = src(y2_read, x1); + out = out + src_reg * ((x2 - src_x) * (src_y - y1)); - src_reg = src(y2_read, x2_read); - out = out + src_reg * ((src_x - x1) * (src_y - y1)); + src_reg = src(y2_read, x2_read); + out = out + src_reg * ((src_x - x1) * (src_y - y1)); - dst(dst_y, dst_x) = saturate_cast(out); - } + dst(dst_y, dst_x) = saturate_cast(out); } + } - template __global__ void resize(const Ptr2D src, const float fx, const float fy, PtrStepSz dst) - { - const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; - const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; + template __global__ void resize(const Ptr2D src, PtrStepSz dst, const float fy, const float fx) + { + const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; + const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; - if (dst_x < dst.cols && dst_y < dst.rows) - { - const float src_x = dst_x * fx; - const float src_y = dst_y * fy; + if (dst_x < dst.cols && dst_y < dst.rows) + { + const float src_x = dst_x * fx; + const float src_y = dst_y * fy; - dst(dst_y, dst_x) = src(src_y, src_x); - } + dst(dst_y, dst_x) = src(src_y, src_x); } + } - template