From: Vladislav Vinogradov Date: Wed, 31 Aug 2011 11:42:54 +0000 (+0000) Subject: implemented gpu::remap for all types X-Git-Tag: accepted/2.0/20130307.220821~1943 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=47d68f6967c7e3a9992cdb9ab77329407f0092fb;p=profile%2Fivi%2Fopencv.git implemented gpu::remap for all types --- diff --git a/modules/gpu/include/opencv2/gpu/devmem2d.hpp b/modules/gpu/include/opencv2/gpu/devmem2d.hpp index e3a3503..232783f 100644 --- a/modules/gpu/include/opencv2/gpu/devmem2d.hpp +++ b/modules/gpu/include/opencv2/gpu/devmem2d.hpp @@ -66,6 +66,9 @@ namespace cv template struct DevMem2D_ { + typedef T elem_type; + typedef int index_type; + int cols; int rows; T* data; @@ -79,8 +82,7 @@ namespace cv template explicit DevMem2D_(const DevMem2D_& d) : cols(d.cols), rows(d.rows), data((T*)d.data), step(d.step) {} - - typedef T elem_type; + enum { elem_size = sizeof(elem_type) }; __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } @@ -89,6 +91,9 @@ namespace cv __CV_GPU_HOST_DEVICE__ operator T*() const { return data; } + __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } + __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } + #if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__) thrust::device_ptr begin() const { return thrust::device_ptr(data); } thrust::device_ptr end() const { return thrust::device_ptr(data) + cols * rows; } @@ -97,19 +102,24 @@ namespace cv template struct PtrStep_ { + typedef T elem_type; + typedef int index_type; + T* data; size_t step; PtrStep_() : data(0), step(0) {} PtrStep_(const DevMem2D_& mem) : data(mem.data), step(mem.step) {} - typedef T elem_type; enum { elem_size = sizeof(elem_type) }; __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return (T*)( (char*)data + y * step); } __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step); } + __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } + __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } + #if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__) thrust::device_ptr begin() const { return thrust::device_ptr(data); } #endif @@ -124,7 +134,10 @@ namespace cv PtrStep_::step /= PtrStep_::elem_size; } __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep_::data + y * PtrStep_::step; } - __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_::data + y * PtrStep_::step; } + __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_::data + y * PtrStep_::step; } + + __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; } + __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } }; typedef DevMem2D_ DevMem2D; diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index fd63ff4..254c888 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -596,8 +596,9 @@ namespace cv ////////////////////////////// Image processing ////////////////////////////// //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation. - //! supports CV_8UC1, CV_8UC3 source types and CV_32FC1 map type - CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap); + //! supports CV_32FC1 map type + CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, + int interpolation, int borderMode = BORDER_CONSTANT, const Scalar& borderValue = Scalar()); //! Does mean shift filtering on GPU. CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, @@ -761,10 +762,10 @@ namespace cv CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst, Stream& stream = Stream::Null()); //! smoothes the source image and downsamples it - CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); + CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); //! upsamples the source image and then smoothes it - CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); + CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); //! performs linear blending of two images //! to avoid accuracy errors sum of weigths shouldn't be very close to zero diff --git a/modules/gpu/src/cuda/filters.cu b/modules/gpu/src/cuda/filters.cu index fd67fb9..3384249 100644 --- a/modules/gpu/src/cuda/filters.cu +++ b/modules/gpu/src/cuda/filters.cu @@ -242,9 +242,9 @@ namespace filter_krnls { const T* srcCol = src.ptr() + x; - sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol); - sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol); - sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol); + sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol, src.step); + sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol, src.step); + sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol, src.step); __syncthreads(); @@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace filters dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); - B b(src.rows, src.step); + B b(src.rows); if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1)) { diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index be46937..b5f3b6b 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -44,6 +44,8 @@ #include "opencv2/gpu/device/border_interpolate.hpp" #include "opencv2/gpu/device/vec_traits.hpp" #include "opencv2/gpu/device/vec_math.hpp" +#include "opencv2/gpu/device/saturate_cast.hpp" +#include "opencv2/gpu/device/utility.hpp" using namespace cv::gpu; using namespace cv::gpu::device; @@ -51,109 +53,119 @@ using namespace cv::gpu::device; /////////////////////////////////// Remap /////////////////////////////////////////////// namespace cv { namespace gpu { namespace imgproc { - texture tex_remap(0, cudaFilterModeLinear, cudaAddressModeWrap); + // cudaAddressModeClamp == BrdReplicate + /*texture tex_remap_uchar_LinearFilter(0, cudaFilterModeLinear, cudaAddressModeClamp); - __global__ void remap_1c(const float* mapx, const float* mapy, size_t map_step, uchar* out, size_t out_step, int width, int height) + __global__ void remap_uchar_LinearFilter(const PtrStepf mapx, const PtrStepf mapy, DevMem2D dst) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x < width && y < height) - { - int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/ + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; - float xcoo = mapx[idx]; - float ycoo = mapy[idx]; + if (x < dst.cols && y < dst.rows) + { + const float xcoo = mapx.ptr(y)[x]; + const float ycoo = mapy.ptr(y)[x]; - out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo)); + dst.ptr(y)[x] = 255.0f * tex2D(tex_remap_uchar_LinearFilter, xcoo, ycoo); } - } + }*/ - __global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy, - size_t map_step, uchar* dst, size_t dst_step, int width, int height) - { + template __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_ dst) + { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x < width && y < height) + if (x < dst.cols && y < dst.rows) { - const int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/ - - const float xcoo = mapx[idx]; - const float ycoo = mapy[idx]; - - uchar3 out = make_uchar3(0, 0, 0); + const float xcoo = mapx.ptr(y)[x]; + const float ycoo = mapy.ptr(y)[x]; - if (xcoo >= 0 && xcoo < width - 1 && ycoo >= 0 && ycoo < height - 1) - { - const int x1 = __float2int_rd(xcoo); - const int y1 = __float2int_rd(ycoo); - const int x2 = x1 + 1; - const int y2 = y1 + 1; - - uchar src_reg = *(src + y1 * src_step + 3 * x1); - out.x += src_reg * (x2 - xcoo) * (y2 - ycoo); - src_reg = *(src + y1 * src_step + 3 * x1 + 1); - out.y += src_reg * (x2 - xcoo) * (y2 - ycoo); - src_reg = *(src + y1 * src_step + 3 * x1 + 2); - out.z += src_reg * (x2 - xcoo) * (y2 - ycoo); - - src_reg = *(src + y1 * src_step + 3 * x2); - out.x += src_reg * (xcoo - x1) * (y2 - ycoo); - src_reg = *(src + y1 * src_step + 3 * x2 + 1); - out.y += src_reg * (xcoo - x1) * (y2 - ycoo); - src_reg = *(src + y1 * src_step + 3 * x2 + 2); - out.z += src_reg * (xcoo - x1) * (y2 - ycoo); - - src_reg = *(src + y2 * src_step + 3 * x1); - out.x += src_reg * (x2 - xcoo) * (ycoo - y1); - src_reg = *(src + y2 * src_step + 3 * x1 + 1); - out.y += src_reg * (x2 - xcoo) * (ycoo - y1); - src_reg = *(src + y2 * src_step + 3 * x1 + 2); - out.z += src_reg * (x2 - xcoo) * (ycoo - y1); - - src_reg = *(src + y2 * src_step + 3 * x2); - out.x += src_reg * (xcoo - x1) * (ycoo - y1); - src_reg = *(src + y2 * src_step + 3 * x2 + 1); - out.y += src_reg * (xcoo - x1) * (ycoo - y1); - src_reg = *(src + y2 * src_step + 3 * x2 + 2); - out.z += src_reg * (xcoo - x1) * (ycoo - y1); - } - - /**(uchar3*)(dst + y * dst_step + 3 * x) = out;*/ - *(dst + y * dst_step + 3 * x) = out.x; - *(dst + y * dst_step + 3 * x + 1) = out.y; - *(dst + y * dst_step + 3 * x + 2) = out.z; + dst.ptr(y)[x] = saturate_cast(src(ycoo, xcoo)); } } - void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst) + template