\r
template <typename T> struct DevMem2D_\r
{ \r
+ typedef T elem_type;\r
+ typedef int index_type;\r
+\r
int cols;\r
int rows;\r
T* data;\r
template <typename U> \r
explicit DevMem2D_(const DevMem2D_<U>& d)\r
: cols(d.cols), rows(d.rows), data((T*)d.data), step(d.step) {}\r
- \r
- typedef T elem_type;\r
+\r
enum { elem_size = sizeof(elem_type) };\r
\r
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }\r
\r
__CV_GPU_HOST_DEVICE__ operator T*() const { return data; }\r
\r
+ __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }\r
+ __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }\r
+\r
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__) \r
thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); }\r
thrust::device_ptr<T> end() const { return thrust::device_ptr<T>(data) + cols * rows; }\r
\r
template<typename T> struct PtrStep_\r
{\r
+ typedef T elem_type;\r
+ typedef int index_type;\r
+\r
T* data;\r
size_t step;\r
\r
PtrStep_() : data(0), step(0) {} \r
PtrStep_(const DevMem2D_<T>& mem) : data(mem.data), step(mem.step) {}\r
\r
- typedef T elem_type;\r
enum { elem_size = sizeof(elem_type) };\r
\r
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }\r
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return (T*)( (char*)data + y * step); }\r
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step); }\r
\r
+ __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }\r
+ __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }\r
+\r
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__) \r
thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); }\r
#endif\r
PtrStep_<T>::step /= PtrStep_<T>::elem_size; \r
}\r
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep_<T>::data + y * PtrStep_<T>::step; }\r
- __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_<T>::data + y * PtrStep_<T>::step; } \r
+ __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_<T>::data + y * PtrStep_<T>::step; } \r
+\r
+ __CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }\r
+ __CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; } \r
};\r
\r
typedef DevMem2D_<unsigned char> DevMem2D;\r
////////////////////////////// Image processing //////////////////////////////\r
\r
//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation.\r
- //! supports CV_8UC1, CV_8UC3 source types and CV_32FC1 map type\r
- CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap);\r
+ //! supports CV_32FC1 map type\r
+ CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,\r
+ int interpolation, int borderMode = BORDER_CONSTANT, const Scalar& borderValue = Scalar());\r
\r
//! Does mean shift filtering on GPU.\r
CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,\r
CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst, Stream& stream = Stream::Null());\r
\r
//! smoothes the source image and downsamples it\r
- CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());\r
+ CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());\r
\r
//! upsamples the source image and then smoothes it\r
- CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());\r
+ CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());\r
\r
//! performs linear blending of two images\r
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero\r
{\r
const T* srcCol = src.ptr() + x;\r
\r
- sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol);\r
- sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol);\r
- sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol);\r
+ sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol, src.step);\r
+ sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol, src.step);\r
+ sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol, src.step);\r
\r
__syncthreads();\r
\r
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
\r
- B<T> b(src.rows, src.step);\r
+ B<T> b(src.rows);\r
\r
if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))\r
{\r
#include "opencv2/gpu/device/border_interpolate.hpp"\r
#include "opencv2/gpu/device/vec_traits.hpp"\r
#include "opencv2/gpu/device/vec_math.hpp"\r
+#include "opencv2/gpu/device/saturate_cast.hpp"\r
+#include "opencv2/gpu/device/utility.hpp"\r
\r
using namespace cv::gpu;\r
using namespace cv::gpu::device;\r
/////////////////////////////////// Remap ///////////////////////////////////////////////\r
namespace cv { namespace gpu { namespace imgproc\r
{\r
- texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap(0, cudaFilterModeLinear, cudaAddressModeWrap);\r
+ // cudaAddressModeClamp == BrdReplicate\r
+ /*texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_remap_uchar_LinearFilter(0, cudaFilterModeLinear, cudaAddressModeClamp);\r
\r
- __global__ void remap_1c(const float* mapx, const float* mapy, size_t map_step, uchar* out, size_t out_step, int width, int height)\r
+ __global__ void remap_uchar_LinearFilter(const PtrStepf mapx, const PtrStepf mapy, DevMem2D dst)\r
{ \r
- int x = blockDim.x * blockIdx.x + threadIdx.x;\r
- int y = blockDim.y * blockIdx.y + threadIdx.y;\r
- if (x < width && y < height)\r
- {\r
- int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
- float xcoo = mapx[idx];\r
- float ycoo = mapy[idx];\r
+ if (x < dst.cols && y < dst.rows)\r
+ {\r
+ const float xcoo = mapx.ptr(y)[x];\r
+ const float ycoo = mapy.ptr(y)[x];\r
\r
- out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo)); \r
+ dst.ptr(y)[x] = 255.0f * tex2D(tex_remap_uchar_LinearFilter, xcoo, ycoo); \r
}\r
- }\r
+ }*/\r
\r
- __global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy,\r
- size_t map_step, uchar* dst, size_t dst_step, int width, int height)\r
- { \r
+ template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)\r
+ {\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
- if (x < width && y < height)\r
+ if (x < dst.cols && y < dst.rows)\r
{\r
- const int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/\r
-\r
- const float xcoo = mapx[idx];\r
- const float ycoo = mapy[idx];\r
- \r
- uchar3 out = make_uchar3(0, 0, 0);\r
+ const float xcoo = mapx.ptr(y)[x];\r
+ const float ycoo = mapy.ptr(y)[x];\r
\r
- if (xcoo >= 0 && xcoo < width - 1 && ycoo >= 0 && ycoo < height - 1)\r
- {\r
- const int x1 = __float2int_rd(xcoo);\r
- const int y1 = __float2int_rd(ycoo);\r
- const int x2 = x1 + 1;\r
- const int y2 = y1 + 1;\r
- \r
- uchar src_reg = *(src + y1 * src_step + 3 * x1);\r
- out.x += src_reg * (x2 - xcoo) * (y2 - ycoo);\r
- src_reg = *(src + y1 * src_step + 3 * x1 + 1);\r
- out.y += src_reg * (x2 - xcoo) * (y2 - ycoo);\r
- src_reg = *(src + y1 * src_step + 3 * x1 + 2);\r
- out.z += src_reg * (x2 - xcoo) * (y2 - ycoo);\r
-\r
- src_reg = *(src + y1 * src_step + 3 * x2); \r
- out.x += src_reg * (xcoo - x1) * (y2 - ycoo);\r
- src_reg = *(src + y1 * src_step + 3 * x2 + 1); \r
- out.y += src_reg * (xcoo - x1) * (y2 - ycoo);\r
- src_reg = *(src + y1 * src_step + 3 * x2 + 2); \r
- out.z += src_reg * (xcoo - x1) * (y2 - ycoo);\r
-\r
- src_reg = *(src + y2 * src_step + 3 * x1); \r
- out.x += src_reg * (x2 - xcoo) * (ycoo - y1);\r
- src_reg = *(src + y2 * src_step + 3 * x1 + 1); \r
- out.y += src_reg * (x2 - xcoo) * (ycoo - y1);\r
- src_reg = *(src + y2 * src_step + 3 * x1 + 2); \r
- out.z += src_reg * (x2 - xcoo) * (ycoo - y1);\r
-\r
- src_reg = *(src + y2 * src_step + 3 * x2); \r
- out.x += src_reg * (xcoo - x1) * (ycoo - y1);\r
- src_reg = *(src + y2 * src_step + 3 * x2 + 1); \r
- out.y += src_reg * (xcoo - x1) * (ycoo - y1);\r
- src_reg = *(src + y2 * src_step + 3 * x2 + 2); \r
- out.z += src_reg * (xcoo - x1) * (ycoo - y1);\r
- }\r
-\r
- /**(uchar3*)(dst + y * dst_step + 3 * x) = out;*/\r
- *(dst + y * dst_step + 3 * x) = out.x;\r
- *(dst + y * dst_step + 3 * x + 1) = out.y;\r
- *(dst + y * dst_step + 3 * x + 2) = out.z;\r
+ dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));\r
}\r
}\r
\r
- void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)\r
+ template <template <typename> class Filter, template <typename> class B, typename T> \r
+ void remap_caller(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, T borderValue)\r
{\r
- dim3 threads(16, 16, 1);\r
- dim3 grid(1, 1, 1);\r
- grid.x = divUp(dst.cols, threads.x);\r
- grid.y = divUp(dst.rows, threads.y);\r
+ dim3 block(32, 8);\r
+ dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
\r
- TextureBinder tex(&tex_remap, src);\r
+ B<T> brd(src.rows, src.cols, borderValue);\r
+ BorderReader< PtrStep_<T>, B<T> > brd_src(src, brd);\r
+ Filter< BorderReader< PtrStep_<T>, B<T> > > filter_src(brd_src);\r
\r
- remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);\r
+ remap<<<grid, block>>>(filter_src, mapx, mapy, dst);\r
cudaSafeCall( cudaGetLastError() );\r
\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
- \r
- void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)\r
+\r
+#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type, filter) \\r
+ template <> void remap_caller<filter, BrdReplicate>(const DevMem2D_<type>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<type>& dst, type) \\r
+ { \\r
+ const dim3 block(16, 16); \\r
+ const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
+ TextureBinder tex(&tex_remap_ ## type ## _ ## filter ## , src); \\r
+ remap_ ## type ## _ ## filter ## <<<grid, block>>>(mapx, mapy, dst); \\r
+ cudaSafeCall( cudaGetLastError() ); \\r
+ cudaSafeCall( cudaDeviceSynchronize() ); \\r
+ }\r
+\r
+ //OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar, LinearFilter)\r
+\r
+#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX\r
+\r
+ template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, \r
+ int interpolation, int borderMode, const double borderValue[4])\r
{\r
- dim3 threads(32, 8, 1);\r
- dim3 grid(1, 1, 1);\r
- grid.x = divUp(dst.cols, threads.x);\r
- grid.y = divUp(dst.rows, threads.y);\r
+ typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst, T borderValue);\r
\r
- remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);\r
- cudaSafeCall( cudaGetLastError() );\r
+ static const caller_t callers[2][3] = \r
+ {\r
+ { remap_caller<PointFilter, BrdReflect101>, remap_caller<PointFilter, BrdReplicate>, remap_caller<PointFilter, BrdConstant> },\r
+ { remap_caller<LinearFilter, BrdReflect101>, remap_caller<LinearFilter, BrdReplicate>, remap_caller<LinearFilter, BrdConstant> }\r
+ };\r
\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
+ typename VecTraits<T>::elem_type brd[] = {(typename VecTraits<T>::elem_type)borderValue[0], (typename VecTraits<T>::elem_type)borderValue[1], (typename VecTraits<T>::elem_type)borderValue[2], (typename VecTraits<T>::elem_type)borderValue[3]};\r
+\r
+ callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), VecTraits<T>::make(brd));\r
}\r
\r
+ template void remap_gpu<uchar >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uchar2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uchar3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uchar4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ \r
+ template void remap_gpu<schar>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<char2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<char3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<char4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ \r
+ template void remap_gpu<ushort >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<ushort2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<ushort3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<ushort4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ \r
+ template void remap_gpu<short >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<short2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<short3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<short4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ \r
+ template void remap_gpu<uint >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uint2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uint3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<uint4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ \r
+ template void remap_gpu<int >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<int2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<int3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<int4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ \r
+ template void remap_gpu<float >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<float2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<float3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+ template void remap_gpu<float4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);\r
+\r
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////\r
\r
texture<uchar4, 2> tex_meanshift;\r
}\r
}\r
\r
- template <typename B>\r
+ template <typename BR, typename BC>\r
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,\r
- PtrStep dst, B border_row, B border_col)\r
+ PtrStep dst, BR border_row, BC border_col)\r
{\r
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;\r
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;\r
\r
for (int i = ibegin; i < iend; ++i)\r
{\r
- int y = border_col.idx(i);\r
+ int y = border_col.idx_row(i);\r
for (int j = jbegin; j < jend; ++j)\r
{\r
- int x = border_row.idx(j);\r
+ int x = border_row.idx_col(j);\r
float dx = tex2D(harrisDxTex, x, y);\r
float dy = tex2D(harrisDyTex, x, y);\r
a += dx * dx;\r
{\r
case BORDER_REFLECT101_GPU:\r
cornerHarris_kernel<<<grid, threads>>>(\r
- cols, rows, block_size, k, dst, BrdReflect101(cols), BrdReflect101(rows));\r
+ cols, rows, block_size, k, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));\r
break;\r
case BORDER_REPLICATE_GPU:\r
harrisDxTex.addressMode[0] = cudaAddressModeClamp;\r
}\r
\r
\r
- template <typename B>\r
+ template <typename BR, typename BC>\r
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, \r
- PtrStep dst, B border_row, B border_col)\r
+ PtrStep dst, BR border_row, BC border_col)\r
{\r
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;\r
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;\r
\r
for (int i = ibegin; i < iend; ++i)\r
{\r
- int y = border_col.idx(i);\r
+ int y = border_col.idx_row(i);\r
for (int j = jbegin; j < jend; ++j)\r
{\r
- int x = border_row.idx(j);\r
+ int x = border_row.idx_col(j);\r
float dx = tex2D(minEigenValDxTex, x, y);\r
float dy = tex2D(minEigenValDyTex, x, y);\r
a += dx * dx;\r
{\r
case BORDER_REFLECT101_GPU:\r
cornerMinEigenVal_kernel<<<grid, threads>>>(\r
- cols, rows, block_size, dst, BrdReflect101(cols), BrdReflect101(rows));\r
+ cols, rows, block_size, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));\r
break;\r
case BORDER_REPLICATE_GPU:\r
minEigenValDxTex.addressMode[0] = cudaAddressModeClamp;\r
//////////////////////////////////////////////////////////////////////////\r
// pyrDown\r
\r
- template <typename T> __global__ void pyrDown(const PtrStep_<T> src, PtrStep_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd, int dst_cols)\r
+ template <typename T, typename B> __global__ void pyrDown(const PtrStep_<T> src, PtrStep_<T> dst, const B b, int dst_cols)\r
{\r
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;\r
\r
\r
sum = VecTraits<value_type>::all(0);\r
\r
- sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(x)];\r
- sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(x)];\r
- sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(x)];\r
- sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(x)];\r
- sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(x)];\r
+ sum = sum + 0.0625f * b.at(src_y - 2, x, src.data, src.step);\r
+ sum = sum + 0.25f * b.at(src_y - 1, x, src.data, src.step);\r
+ sum = sum + 0.375f * b.at(src_y , x, src.data, src.step);\r
+ sum = sum + 0.25f * b.at(src_y + 1, x, src.data, src.step);\r
+ sum = sum + 0.0625f * b.at(src_y + 2, x, src.data, src.step);\r
\r
smem[2 + threadIdx.x] = sum;\r
\r
\r
sum = VecTraits<value_type>::all(0);\r
\r
- sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(left_x)];\r
- sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(left_x)];\r
- sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(left_x)];\r
- sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(left_x)];\r
- sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(left_x)];\r
+ sum = sum + 0.0625f * b.at(src_y - 2, left_x, src.data, src.step);\r
+ sum = sum + 0.25f * b.at(src_y - 1, left_x, src.data, src.step);\r
+ sum = sum + 0.375f * b.at(src_y , left_x, src.data, src.step);\r
+ sum = sum + 0.25f * b.at(src_y + 1, left_x, src.data, src.step);\r
+ sum = sum + 0.0625f * b.at(src_y + 2, left_x, src.data, src.step);\r
\r
smem[threadIdx.x] = sum;\r
}\r
\r
sum = VecTraits<value_type>::all(0);\r
\r
- sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(right_x)];\r
- sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(right_x)];\r
- sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(right_x)];\r
- sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(right_x)];\r
- sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(right_x)];\r
+ sum = sum + 0.0625f * b.at(src_y - 2, right_x, src.data, src.step);\r
+ sum = sum + 0.25f * b.at(src_y - 1, right_x, src.data, src.step);\r
+ sum = sum + 0.375f * b.at(src_y , right_x, src.data, src.step);\r
+ sum = sum + 0.25f * b.at(src_y + 1, right_x, src.data, src.step);\r
+ sum = sum + 0.0625f * b.at(src_y + 2, right_x, src.data, src.step);\r
\r
smem[4 + threadIdx.x] = sum;\r
}\r
}\r
}\r
\r
- template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+ template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)\r
{\r
const dim3 block(256);\r
const dim3 grid(divUp(src.cols, block.x), dst.rows);\r
\r
- BrdReflect101 rowBrd(src.cols);\r
- BrdReflect101 colBrd(src.rows);\r
+ B<T> b(src.rows, src.cols);\r
\r
- pyrDown<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>(\r
- static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src), \r
- static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst), \r
- rowBrd, colBrd, dst.cols);\r
+ pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);\r
cudaSafeCall( cudaGetLastError() );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
- template void pyrDown_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream)\r
+ {\r
+ typedef typename TypeVec<T, cn>::vec_type type;\r
+\r
+ typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);\r
+\r
+ static const caller_t callers[] = \r
+ {\r
+ pyrDown_caller<type, BrdReflect101>, pyrDown_caller<type, BrdReplicate>, pyrDown_caller<type, BrdConstant>\r
+ };\r
+\r
+ callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);\r
+ }\r
+\r
+ template void pyrDown_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
- template void pyrDown_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template void pyrDown_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
- template void pyrDown_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template void pyrDown_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
- template void pyrDown_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template void pyrDown_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
- template void pyrDown_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template void pyrDown_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
- template void pyrDown_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrDown_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template void pyrDown_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrDown_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
//////////////////////////////////////////////////////////////////////////\r
// pyrUp\r
\r
- template <typename T> __global__ void pyrUp(const PtrStep_<T> src, DevMem2D_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd)\r
+ template <typename T, typename B> __global__ void pyrUp(const PtrStep_<T> src, DevMem2D_<T> dst, const B b)\r
{\r
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;\r
\r
value_type sum;\r
\r
if (threadIdx.x < 10 && threadIdx.y < 10)\r
- smem1[threadIdx.y][threadIdx.x] = src.ptr(colBrd.idx(blockIdx.y * blockDim.y / 2 + threadIdx.y - 1))[rowBrd.idx(blockIdx.x * blockDim.x / 2 + threadIdx.x - 1)];\r
+ smem1[threadIdx.y][threadIdx.x] = b.at(blockIdx.y * blockDim.y / 2 + threadIdx.y - 1, blockIdx.x * blockDim.x / 2 + threadIdx.x - 1, src.data, src.step);\r
\r
__syncthreads();\r
\r
dst.ptr(y)[x] = saturate_cast<T>(4.0f * sum);\r
}\r
\r
- template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
+ template <typename T, template <typename> class B> void pyrUp_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)\r
{\r
const dim3 block(16, 16);\r
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
\r
- BrdReflect101 rowBrd(src.cols);\r
- BrdReflect101 colBrd(src.rows);\r
+ B<T> b(src.rows, src.cols);\r
\r
- pyrUp<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>(\r
- static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src), \r
- static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst), \r
- rowBrd, colBrd);\r
+ pyrUp<T><<<grid, block, 0, stream>>>(src, dst, b);\r
cudaSafeCall( cudaGetLastError() );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
- template void pyrUp_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
-\r
- template void pyrUp_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
-\r
- template void pyrUp_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
-\r
- template void pyrUp_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
-\r
- template void pyrUp_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
-\r
- template void pyrUp_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
- template void pyrUp_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream)\r
+ {\r
+ typedef typename TypeVec<T, cn>::vec_type type;\r
+\r
+ typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);\r
+\r
+ static const caller_t callers[] = \r
+ {\r
+ pyrUp_caller<type, BrdReflect101>, pyrUp_caller<type, BrdReplicate>, pyrUp_caller<type, BrdConstant>\r
+ };\r
+\r
+ callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);\r
+ }\r
+\r
+ template void pyrUp_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+\r
+ template void pyrUp_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+\r
+ template void pyrUp_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+\r
+ template void pyrUp_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+\r
+ template void pyrUp_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+\r
+ template void pyrUp_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
+ template void pyrUp_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
//////////////////////////////////////////////////////////////////////////\r
// buildWarpMaps\r
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f\r
};\r
\r
- __device__ __forceinline__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)\r
+ struct WinReader\r
{\r
- float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;\r
- float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;\r
+ typedef uchar elem_type;\r
\r
- return tex2D(imgTex, pixel_x, pixel_y);\r
- }\r
-\r
- __device__ unsigned char calcPATCH(int i1, int j1, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir, int win_size)\r
- {\r
- /* Scale the window to size PATCH_SZ so each pixel's size is s. This\r
- makes calculating the gradients with wavelets of size 2s easy */\r
- const float icoo = ((float)i1 / (PATCH_SZ + 1)) * win_size;\r
- const float jcoo = ((float)j1 / (PATCH_SZ + 1)) * win_size;\r
+ __device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) : \r
+ centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_)\r
+ {\r
+ }\r
\r
- const int i = __float2int_rd(icoo);\r
- const int j = __float2int_rd(jcoo);\r
+ __device__ __forceinline__ uchar operator ()(int i, int j) const\r
+ {\r
+ float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;\r
+ float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;\r
\r
- float res = calcWin(i, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (j + 1 - jcoo);\r
- res += calcWin(i + 1, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (j + 1 - jcoo);\r
- res += calcWin(i + 1, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (jcoo - j);\r
- res += calcWin(i, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (jcoo - j);\r
+ return tex2D(imgTex, pixel_x, pixel_y);\r
+ }\r
\r
- return saturate_cast<unsigned char>(res);\r
- } \r
+ float centerX; \r
+ float centerY;\r
+ float win_offset; \r
+ float cos_dir; \r
+ float sin_dir;\r
+ };\r
\r
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25], \r
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)\r
const int xIndex = xBlock * 5 + threadIdx.x;\r
const int yIndex = yBlock * 5 + threadIdx.y;\r
\r
- s_PATCH[threadIdx.y][threadIdx.x] = calcPATCH(yIndex, xIndex, centerX, centerY, win_offset, cos_dir, sin_dir, win_size);\r
+ const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;\r
+ const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;\r
+\r
+ LinearFilter<WinReader> filter(WinReader(centerX, centerY, win_offset, cos_dir, sin_dir));\r
+\r
+ s_PATCH[threadIdx.y][threadIdx.x] = filter(icoo, jcoo);\r
+\r
__syncthreads();\r
\r
if (threadIdx.x < 5 && threadIdx.y < 5)\r
if( refcount && CV_XADD(refcount, -1) == 1 )\r
{\r
fastFree(refcount);\r
- cudaSafeCall( cudaFree(datastart) );\r
+ cudaFree(datastart);\r
}\r
data = datastart = dataend = 0;\r
step = rows = cols = 0;\r
\r
#if !defined (HAVE_CUDA)\r
\r
-void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&){ throw_nogpu(); }\r
+void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&){ throw_nogpu(); }\r
void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }\r
void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
-void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
-void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::pyrDown(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
+void cv::gpu::pyrUp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
+////////////////////////////////////////////////////////////////////////\r
+// remap\r
+\r
namespace cv { namespace gpu { namespace imgproc\r
{\r
- void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
- void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
-\r
- extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);\r
- extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);\r
+ template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, \r
+ int interpolation, int borderMode, const double borderValue[4]);\r
+}}}\r
\r
- void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
- void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
+void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue)\r
+{\r
+ using namespace cv::gpu::imgproc;\r
\r
- void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
- void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
-}}}\r
+ typedef void (*caller_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);;\r
+ static const caller_t callers[6][4] = \r
+ {\r
+ {remap_gpu<uchar>, remap_gpu<uchar2>, remap_gpu<uchar3>, remap_gpu<uchar4>},\r
+ {remap_gpu<schar>, remap_gpu<char2>, remap_gpu<char3>, remap_gpu<char4>},\r
+ {remap_gpu<ushort>, remap_gpu<ushort2>, remap_gpu<ushort3>, remap_gpu<ushort4>},\r
+ {remap_gpu<short>, remap_gpu<short2>, remap_gpu<short3>, remap_gpu<short4>},\r
+ {remap_gpu<int>, remap_gpu<int2>, remap_gpu<int3>, remap_gpu<int4>},\r
+ {remap_gpu<float>, remap_gpu<float2>, remap_gpu<float3>, remap_gpu<float4>}\r
+ };\r
\r
-////////////////////////////////////////////////////////////////////////\r
-// remap\r
+ CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
+ CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());\r
\r
-void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap)\r
-{\r
- typedef void (*remap_gpu_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
- static const remap_gpu_t callers[] = {imgproc::remap_gpu_1c, 0, imgproc::remap_gpu_3c};\r
+ CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);\r
\r
- CV_Assert((src.type() == CV_8U || src.type() == CV_8UC3) && xmap.type() == CV_32F && ymap.type() == CV_32F);\r
+ CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT);\r
+ int gpuBorderType;\r
+ CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));\r
\r
dst.create(xmap.size(), src.type());\r
\r
- callers[src.channels() - 1](src, xmap, ymap, dst);\r
+ callers[src.depth()][src.channels() - 1](src, xmap, ymap, dst, interpolation, gpuBorderType, borderValue.val);\r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
// meanShiftFiltering_GPU\r
\r
+namespace cv { namespace gpu { namespace imgproc\r
+{\r
+ extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);\r
+}}}\r
+\r
void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria)\r
{\r
if( src.empty() )\r
////////////////////////////////////////////////////////////////////////\r
// meanShiftProc_GPU\r
\r
+namespace cv { namespace gpu { namespace imgproc\r
+{\r
+ extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);\r
+}}}\r
+\r
void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria)\r
{\r
if( src.empty() )\r
////////////////////////////////////////////////////////////////////////\r
// drawColorDisp\r
\r
+namespace cv { namespace gpu { namespace imgproc\r
+{\r
+ void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
+ void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);\r
+}}}\r
+\r
namespace\r
{\r
template <typename T>\r
////////////////////////////////////////////////////////////////////////\r
// reprojectImageTo3D\r
\r
+namespace cv { namespace gpu { namespace imgproc\r
+{\r
+ void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
+ void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);\r
+}}}\r
+\r
namespace\r
{\r
template <typename T>\r
\r
namespace cv { namespace gpu { namespace imgproc\r
{\r
- template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
}}}\r
\r
-void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)\r
+void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)\r
{\r
using namespace cv::gpu::imgproc;\r
\r
- typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
static const func_t funcs[6][4] = \r
{\r
\r
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
\r
+ CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);\r
+ int gpuBorderType;\r
+ CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
+\r
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
\r
- funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream));\r
+ funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));\r
}\r
\r
\r
\r
namespace cv { namespace gpu { namespace imgproc\r
{\r
- template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
}}}\r
\r
-void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)\r
+void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)\r
{\r
using namespace cv::gpu::imgproc;\r
\r
- typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);\r
+ typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);\r
\r
static const func_t funcs[6][4] = \r
{\r
\r
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
\r
+ CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);\r
+ int gpuBorderType;\r
+ CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
+\r
dst.create(src.rows*2, src.cols*2, src.type());\r
\r
- funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream));\r
+ funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));\r
}\r
\r
\r
\r
namespace cv { namespace gpu { namespace device\r
{\r
- struct BrdReflect101 \r
+ //////////////////////////////////////////////////////////////\r
+ // BrdConstant\r
+\r
+ template <typename D> struct BrdRowConstant\r
{\r
- explicit __host__ __device__ __forceinline__ BrdReflect101(int len): last(len - 1) {}\r
+ typedef D result_type;\r
+\r
+ explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}\r
\r
- __device__ __forceinline__ int idx_low(int i) const\r
+ template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const \r
{\r
- return abs(i);\r
+ return x >= 0 ? saturate_cast<D>(data[x]) : val;\r
}\r
\r
- __device__ __forceinline__ int idx_high(int i) const \r
+ template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const \r
{\r
- return last - abs(last - i);\r
+ return x < width ? saturate_cast<D>(data[x]) : val;\r
}\r
\r
- __device__ __forceinline__ int idx(int i) const\r
+ template <typename T> __device__ __forceinline__ D at(int x, const T* data) const \r
{\r
- return idx_low(idx_high(i));\r
+ return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;\r
}\r
\r
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
{\r
- return -last <= mini && maxi <= 2 * last;\r
+ return true;\r
}\r
\r
- const int last;\r
+ const int width;\r
+ const D val;\r
};\r
\r
- template <typename D> struct BrdRowReflect101 : BrdReflect101\r
+ template <typename D> struct BrdColConstant\r
{\r
- explicit __host__ __device__ __forceinline__ BrdRowReflect101(int len): BrdReflect101(len) {}\r
+ typedef D result_type;\r
\r
- template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const \r
+ explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}\r
+\r
+ template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const \r
{\r
- return saturate_cast<D>(data[idx_low(i)]);\r
+ return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;\r
}\r
\r
- template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const \r
+ template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const \r
{\r
- return saturate_cast<D>(data[idx_high(i)]);\r
+ return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;\r
}\r
+\r
+ template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const \r
+ {\r
+ return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;\r
+ }\r
+\r
+ __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
+ {\r
+ return true;\r
+ }\r
+\r
+ const int height;\r
+ const D val;\r
};\r
\r
- template <typename D> struct BrdColReflect101 : BrdReflect101\r
+ template <typename D> struct BrdConstant\r
{\r
- __host__ __device__ __forceinline__ BrdColReflect101(int len, size_t step): BrdReflect101(len), step(step) {}\r
+ typedef D result_type;\r
\r
- template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const \r
+ __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : \r
+ height(height_), width(width_), val(val_) \r
{\r
- return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));\r
}\r
\r
- template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const \r
+ template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const\r
{\r
- return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));\r
+ return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;\r
}\r
\r
- const size_t step;\r
+ template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const\r
+ {\r
+ return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;\r
+ }\r
+\r
+ const int height;\r
+ const int width;\r
+ const D val;\r
};\r
\r
- struct BrdReplicate\r
+ //////////////////////////////////////////////////////////////\r
+ // BrdReplicate\r
+\r
+ template <typename D> struct BrdRowReplicate\r
{\r
- explicit __host__ __device__ __forceinline__ BrdReplicate(int len): last(len - 1) {}\r
+ typedef D result_type;\r
+\r
+ explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}\r
+ template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}\r
+\r
+ __device__ __forceinline__ int idx_col_low(int x) const\r
+ {\r
+ return ::max(x, 0);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col_high(int x) const \r
+ {\r
+ return ::min(x, last_col);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col(int x) const\r
+ {\r
+ return idx_col_low(idx_col_high(x));\r
+ }\r
\r
- __device__ __forceinline__ int idx_low(int i) const\r
+ template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const \r
{\r
- return ::max(i, 0);\r
+ return saturate_cast<D>(data[idx_col_low(x)]);\r
}\r
\r
- __device__ __forceinline__ int idx_high(int i) const \r
+ template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const \r
{\r
- return ::min(i, last);\r
+ return saturate_cast<D>(data[idx_col_high(x)]);\r
}\r
\r
- __device__ __forceinline__ int idx(int i) const\r
+ template <typename T> __device__ __forceinline__ D at(int x, const T* data) const \r
{\r
- return idx_low(idx_high(i));\r
+ return saturate_cast<D>(data[idx_col(x)]);\r
}\r
\r
bool is_range_safe(int mini, int maxi) const \r
return true;\r
}\r
\r
- const int last;\r
+ const int last_col;\r
};\r
\r
- template <typename D> struct BrdRowReplicate : BrdReplicate\r
+ template <typename D> struct BrdColReplicate\r
{\r
- explicit __host__ __device__ __forceinline__ BrdRowReplicate(int len): BrdReplicate(len) {}\r
+ typedef D result_type;\r
\r
- template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const \r
+ explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}\r
+ template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}\r
+\r
+ __device__ __forceinline__ int idx_row_low(int y) const\r
{\r
- return saturate_cast<D>(data[idx_low(i)]);\r
+ return ::max(y, 0);\r
}\r
\r
- template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const \r
+ __device__ __forceinline__ int idx_row_high(int y) const \r
{\r
- return saturate_cast<D>(data[idx_high(i)]);\r
+ return ::min(y, last_row);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_row(int y) const\r
+ {\r
+ return idx_row_low(idx_row_high(y));\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const \r
+ {\r
+ return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const \r
+ {\r
+ return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));\r
}\r
- };\r
\r
+ template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const \r
+ {\r
+ return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));\r
+ }\r
\r
- template <typename D> struct BrdColReplicate : BrdReplicate\r
+ bool is_range_safe(int mini, int maxi) const \r
+ {\r
+ return true;\r
+ }\r
+\r
+ const int last_row;\r
+ };\r
+\r
+ template <typename D> struct BrdReplicate\r
{\r
- __host__ __device__ __forceinline__ BrdColReplicate(int len, size_t step): BrdReplicate(len), step(step) {}\r
+ typedef D result_type;\r
+\r
+ __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : \r
+ last_row(height - 1), last_col(width - 1) \r
+ {\r
+ }\r
+ template <typename U> \r
+ __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : \r
+ last_row(height - 1), last_col(width - 1) \r
+ {\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_row_low(int y) const\r
+ {\r
+ return ::max(y, 0);\r
+ }\r
+ __device__ __forceinline__ float idx_row_low(float y) const\r
+ {\r
+ return ::fmax(y, 0.0f);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_row_high(int y) const \r
+ {\r
+ return ::min(y, last_row);\r
+ }\r
+ __device__ __forceinline__ float idx_row_high(float y) const \r
+ {\r
+ return ::fmin(y, last_row);\r
+ }\r
\r
- template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const \r
+ __device__ __forceinline__ int idx_row(int y) const\r
+ {\r
+ return idx_row_low(idx_row_high(y));\r
+ }\r
+ __device__ __forceinline__ float idx_row(float y) const\r
+ {\r
+ return idx_row_low(idx_row_high(y));\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col_low(int x) const\r
+ {\r
+ return ::max(x, 0);\r
+ }\r
+ __device__ __forceinline__ float idx_col_low(float x) const\r
+ {\r
+ return ::fmax(x, 0);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col_high(int x) const \r
+ {\r
+ return ::min(x, last_col);\r
+ }\r
+ __device__ __forceinline__ float idx_col_high(float x) const \r
+ {\r
+ return ::fmin(x, last_col);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col(int x) const\r
+ {\r
+ return idx_col_low(idx_col_high(x));\r
+ }\r
+ __device__ __forceinline__ float idx_col(float x) const\r
{\r
- return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));\r
+ return idx_col_low(idx_col_high(x));\r
}\r
\r
- template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const \r
+ template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const \r
{\r
- return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));\r
+ return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);\r
}\r
\r
- const size_t step;\r
+ template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const \r
+ {\r
+ return saturate_cast<D>(src(idx_row(y), idx_col(x)));\r
+ }\r
+\r
+ const int last_row;\r
+ const int last_col;\r
};\r
\r
- template <typename D> struct BrdRowConstant\r
+ //////////////////////////////////////////////////////////////\r
+ // BrdReflect101\r
+\r
+ template <typename D> struct BrdRowReflect101\r
{\r
- explicit __host__ __device__ __forceinline__ BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}\r
+ typedef D result_type;\r
+\r
+ explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}\r
+ template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}\r
\r
- template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const \r
+ __device__ __forceinline__ int idx_col_low(int x) const\r
{\r
- return i >= 0 ? saturate_cast<D>(data[i]) : val;\r
+ return ::abs(x);\r
}\r
\r
- template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const \r
+ __device__ __forceinline__ int idx_col_high(int x) const \r
{\r
- return i < len ? saturate_cast<D>(data[i]) : val;\r
+ return last_col - ::abs(last_col - x);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col(int x) const\r
+ {\r
+ return idx_col_low(idx_col_high(x));\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_col_low(x)]);\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_col_high(x)]);\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at(int x, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_col(x)]);\r
}\r
\r
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
{\r
- return true;\r
+ return -last_col <= mini && maxi <= 2 * last_col;\r
}\r
\r
- const int len;\r
- const D val;\r
+ const int last_col;\r
};\r
\r
- template <typename D> struct BrdColConstant\r
+ template <typename D> struct BrdColReflect101\r
{\r
- __host__ __device__ __forceinline__ BrdColConstant(int len_, size_t step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}\r
+ typedef D result_type;\r
\r
- template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const \r
+ explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}\r
+ template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}\r
+\r
+ __device__ __forceinline__ int idx_row_low(int y) const\r
+ {\r
+ return ::abs(y);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_row_high(int y) const \r
{\r
- return i >= 0 ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;\r
+ return last_row - ::abs(last_row - y);\r
}\r
\r
- template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const \r
+ __device__ __forceinline__ int idx_row(int y) const\r
{\r
- return i < len ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;\r
+ return idx_row_low(idx_row_high(y));\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const \r
+ {\r
+ return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const \r
+ {\r
+ return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const \r
+ {\r
+ return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));\r
}\r
\r
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
{\r
- return true;\r
+ return -last_row <= mini && maxi <= 2 * last_row;\r
}\r
\r
- const int len;\r
- const size_t step;\r
- const D val;\r
+ const int last_row;\r
};\r
\r
- template <typename OutT> struct BrdConstant\r
+ template <typename D> struct BrdReflect101\r
{\r
- __host__ __device__ __forceinline__ BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {}\r
+ typedef D result_type;\r
+\r
+ __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : \r
+ last_row(height - 1), last_col(width - 1) \r
+ {\r
+ }\r
+ template <typename U> \r
+ __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : \r
+ last_row(height - 1), last_col(width - 1) \r
+ {\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_row_low(int y) const\r
+ {\r
+ return ::abs(y);\r
+ }\r
+ __device__ __forceinline__ float idx_row_low(float y) const\r
+ {\r
+ return ::fabs(y);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_row_high(int y) const \r
+ {\r
+ return last_row - ::abs(last_row - y);\r
+ }\r
+ __device__ __forceinline__ float idx_row_high(float y) const \r
+ {\r
+ return last_row - ::fabs(last_row - y);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_row(int y) const\r
+ {\r
+ return idx_row_low(idx_row_high(y));\r
+ }\r
+ __device__ __forceinline__ float idx_row(float y) const\r
+ {\r
+ return idx_row_low(idx_row_high(y));\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col_low(int x) const\r
+ {\r
+ return ::abs(x);\r
+ }\r
+ __device__ __forceinline__ float idx_col_low(float x) const\r
+ {\r
+ return ::fabs(x);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col_high(int x) const \r
+ {\r
+ return last_col - ::abs(last_col - x);\r
+ }\r
+ __device__ __forceinline__ float idx_col_high(float x) const \r
+ {\r
+ return last_col - ::fabs(last_col - x);\r
+ }\r
+\r
+ __device__ __forceinline__ int idx_col(int x) const\r
+ {\r
+ return idx_col_low(idx_col_high(x));\r
+ }\r
+ __device__ __forceinline__ float idx_col(float x) const\r
+ {\r
+ return idx_col_low(idx_col_high(x));\r
+ }\r
+\r
+ template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const \r
+ {\r
+ return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);\r
+ }\r
+\r
+ template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const \r
+ {\r
+ return saturate_cast<D>(src(idx_row(y), idx_col(x)));\r
+ }\r
+\r
+ const int last_row;\r
+ const int last_col;\r
+ };\r
+\r
+ //////////////////////////////////////////////////////////////\r
+ // BorderReader\r
+\r
+ template <typename Ptr2D, typename B> struct BorderReader\r
+ {\r
+ typedef typename B::result_type elem_type;\r
+ typedef typename Ptr2D::index_type index_type;\r
+\r
+ __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}\r
\r
- __device__ __forceinline__ OutT at(int x, int y, const uchar* data, int step) const\r
+ __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const\r
{\r
- if (x >= 0 && x <= w - 1 && y >= 0 && y <= h - 1)\r
- return ((const OutT*)(data + y * step))[x];\r
- return val;\r
+ return b.at(y, x, ptr);\r
}\r
\r
- const int w;\r
- const int h;\r
- OutT val;\r
+ const Ptr2D ptr;\r
+ const B b;\r
};\r
}}}\r
\r
\r
U vec1Vals[MAX_LEN / THREAD_DIM];\r
};\r
-\r
\r
///////////////////////////////////////////////////////////////////////////////\r
// Solve linear system\r
\r
return false;\r
}\r
+ \r
+ ///////////////////////////////////////////////////////////////////////////////\r
+ // Filters \r
+\r
+ template <typename Ptr2D> struct PointFilter\r
+ {\r
+ typedef typename Ptr2D::elem_type elem_type;\r
+ typedef float index_type;\r
+\r
+ explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_) : src(src_) {}\r
+ \r
+ __device__ __forceinline__ elem_type operator ()(float y, float x) const\r
+ {\r
+ return src(__float2int_rn(y), __float2int_rn(x));\r
+ }\r
+\r
+ const Ptr2D src;\r
+ };\r
+\r
+ template <typename Ptr2D> struct LinearFilter\r
+ {\r
+ typedef typename Ptr2D::elem_type elem_type;\r
+ typedef float index_type;\r
+\r
+ explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_) : src(src_) {}\r
+\r
+ __device__ __forceinline__ elem_type operator ()(float y, float x) const\r
+ {\r
+ typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;\r
+\r
+ work_type out = VecTraits<work_type>::all(0);\r
+\r
+ const int x1 = __float2int_rd(x);\r
+ const int y1 = __float2int_rd(y);\r
+ const int x2 = x1 + 1;\r
+ const int y2 = y1 + 1;\r
+\r
+ elem_type src_reg = src(y1, x1);\r
+ out = out + src_reg * ((x2 - x) * (y2 - y));\r
+\r
+ src_reg = src(y1, x2);\r
+ out = out + src_reg * ((x - x1) * (y2 - y));\r
+\r
+ src_reg = src(y2, x1);\r
+ out = out + src_reg * ((x2 - x) * (y - y1));\r
+\r
+ src_reg = src(y2, x2);\r
+ out = out + src_reg * ((x - x1) * (y - y1));\r
+\r
+ return saturate_cast<elem_type>(out);\r
+ }\r
+\r
+ const Ptr2D src;\r
+ };\r
}}}\r
\r
#endif // __OPENCV_GPU_UTILITY_HPP__\r
enum {cn=1}; \\r
static __device__ __host__ __forceinline__ type all(type v) {return v;} \\r
static __device__ __host__ __forceinline__ type make(type x) {return x;} \\r
+ static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \\r
}; \\r
template<> struct VecTraits<type ## 1> \\r
{ \\r
enum {cn=1}; \\r
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \\r
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \\r
+ static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \\r
}; \\r
template<> struct VecTraits<type ## 2> \\r
{ \\r
enum {cn=2}; \\r
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \\r
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \\r
+ static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \\r
}; \\r
template<> struct VecTraits<type ## 3> \\r
{ \\r
enum {cn=3}; \\r
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \\r
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \\r
+ static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \\r
}; \\r
template<> struct VecTraits<type ## 4> \\r
{ \\r
enum {cn=4}; \\r
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \\r
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \\r
+ static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \\r
}; \\r
template<> struct VecTraits<type ## 8> \\r
{ \\r
enum {cn=8}; \\r
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \\r
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \\r
+ static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \\r
};\r
\r
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)\r
- OPENCV_GPU_IMPLEMENT_VEC_TRAITS(char)\r
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)\r
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)\r
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)\r
\r
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS\r
\r
+ template<> struct VecTraits<char> \r
+ { \r
+ typedef char elem_type; \r
+ enum {cn=1}; \r
+ static __device__ __host__ __forceinline__ char all(char v) {return v;}\r
+ static __device__ __host__ __forceinline__ char make(char x) {return x;}\r
+ static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}\r
+ };\r
template<> struct VecTraits<schar> \r
{ \r
typedef schar elem_type; \r
enum {cn=1}; \r
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}\r
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}\r
+ static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}\r
+ };\r
+ template<> struct VecTraits<char1>\r
+ {\r
+ typedef schar elem_type;\r
+ enum {cn=1};\r
+ static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}\r
+ static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}\r
+ static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}\r
+ };\r
+ template<> struct VecTraits<char2>\r
+ {\r
+ typedef schar elem_type;\r
+ enum {cn=2};\r
+ static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}\r
+ static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}\r
+ static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}\r
+ };\r
+ template<> struct VecTraits<char3>\r
+ {\r
+ typedef schar elem_type;\r
+ enum {cn=3};\r
+ static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}\r
+ static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}\r
+ static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}\r
+ };\r
+ template<> struct VecTraits<char4>\r
+ {\r
+ typedef schar elem_type;\r
+ enum {cn=4};\r
+ static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}\r
+ static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}\r
+ static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}\r
+ };\r
+ template<> struct VecTraits<char8>\r
+ {\r
+ typedef schar elem_type;\r
+ enum {cn=8};\r
+ static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}\r
+ static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}\r
+ static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}\r
};\r
}}}\r
\r
///////////////////////////////////////////////////////////////////////////////////////////////////////\r
// remap\r
\r
-struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >\r
+struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int, int> >\r
{\r
cv::gpu::DeviceInfo devInfo;\r
int type;\r
+ int interpolation;\r
+ int borderType;\r
\r
cv::Size size;\r
cv::Mat src;\r
cv::Mat xmap;\r
cv::Mat ymap;\r
+ cv::Scalar borderValue;\r
\r
cv::Mat dst_gold;\r
\r
{\r
devInfo = std::tr1::get<0>(GetParam());\r
type = std::tr1::get<1>(GetParam());\r
+ interpolation = std::tr1::get<2>(GetParam());\r
+ borderType = std::tr1::get<3>(GetParam());\r
\r
cv::gpu::setDevice(devInfo.deviceID());\r
\r
cv::RNG& rng = cvtest::TS::ptr()->get_rng();\r
\r
- size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));\r
+ size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));\r
\r
- src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);\r
- xmap = cvtest::randomMat(rng, size, CV_32FC1, 0.0, src.cols - 1, false);\r
- ymap = cvtest::randomMat(rng, size, CV_32FC1, 0.0, src.rows - 1, false);\r
+ src = cvtest::randomMat(rng, size, type, 0.0, 256.0, false);\r
+\r
+ xmap.create(size, CV_32FC1);\r
+ ymap.create(size, CV_32FC1);\r
+\r
+ for (int y = 0; y < src.rows; ++y)\r
+ {\r
+ float* xmap_row = xmap.ptr<float>(y);\r
+ float* ymap_row = ymap.ptr<float>(y);\r
+\r
+ for (int x = 0; x < src.cols; ++x)\r
+ {\r
+ xmap_row[x] = src.cols - 1 - x;\r
+ ymap_row[x] = src.rows - 1 - y;\r
+ }\r
+ }\r
+\r
+ borderValue[0] = rng.uniform(0.0, 256.0);\r
+ borderValue[1] = rng.uniform(0.0, 256.0);\r
+ borderValue[2] = rng.uniform(0.0, 256.0);\r
+ borderValue[3] = rng.uniform(0.0, 256.0);\r
\r
- cv::remap(src, dst_gold, xmap, ymap, cv::INTER_LINEAR, cv::BORDER_WRAP);\r
+ cv::remap(src, dst_gold, xmap, ymap, interpolation, borderType, borderValue);\r
}\r
};\r
\r
TEST_P(Remap, Accuracy)\r
{\r
+ static const char* interpolations_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC"};\r
+ static const char* borderTypes_str[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};\r
+\r
+ const char* interpolationStr = interpolations_str[interpolation];\r
+ const char* borderTypeStr = borderTypes_str[borderType];\r
+\r
PRINT_PARAM(devInfo);\r
PRINT_TYPE(type);\r
+ PRINT_PARAM(interpolationStr);\r
+ PRINT_PARAM(borderTypeStr);\r
PRINT_PARAM(size);\r
+ PRINT_PARAM(borderValue);\r
\r
cv::Mat dst;\r
\r
ASSERT_NO_THROW(\r
cv::gpu::GpuMat gpuRes;\r
\r
- cv::gpu::remap(cv::gpu::GpuMat(src), gpuRes, cv::gpu::GpuMat(xmap), cv::gpu::GpuMat(ymap));\r
+ cv::gpu::remap(cv::gpu::GpuMat(src), gpuRes, cv::gpu::GpuMat(xmap), cv::gpu::GpuMat(ymap), interpolation, borderType, borderValue);\r
\r
gpuRes.download(dst);\r
);\r
\r
- EXPECT_MAT_SIMILAR(dst_gold, dst, 0.5);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);\r
}\r
\r
-INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(\r
- testing::ValuesIn(devices()), \r
- testing::Values(CV_8UC1, CV_8UC3)));\r
+INSTANTIATE_TEST_CASE_P\r
+(\r
+ ImgProc, Remap, testing::Combine\r
+ (\r
+ testing::ValuesIn(devices()), \r
+ testing::Values\r
+ (\r
+ CV_8UC1, CV_8UC3, CV_8UC4,\r
+ CV_32FC1, CV_32FC3, CV_32FC4\r
+ ),\r
+ testing::Values(cv::INTER_NEAREST, cv::INTER_LINEAR),\r
+ testing::Values(cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT)\r
+ )\r
+);\r
\r
///////////////////////////////////////////////////////////////////////////////////////////////////////\r
// copyMakeBorder\r
Mat src, dst, xmap, ymap;\r
gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;\r
\r
- for (int size = 1000; size <= 8000; size *= 2)\r
+ for (int size = 1000; size <= 4000; size *= 2)\r
{\r
- SUBTEST << "src " << size << " and 8U, 32F maps";\r
+ SUBTEST << "src " << size << ", 8UC1";\r
\r
gen(src, size, size, CV_8UC1, 0, 256);\r
\r
dst.create(xmap.size(), src.type());\r
\r
CPU_ON;\r
- remap(src, dst, xmap, ymap, INTER_LINEAR);\r
+ remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);\r
+ CPU_OFF;\r
+\r
+ d_src = src;\r
+ d_xmap = xmap;\r
+ d_ymap = ymap;\r
+ d_dst.create(d_xmap.size(), d_src.type());\r
+\r
+ GPU_ON;\r
+ gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);\r
+ GPU_OFF;\r
+ }\r
+\r
+ for (int size = 1000; size <= 4000; size *= 2)\r
+ {\r
+ SUBTEST << "src " << size << ", 8UC3";\r
+\r
+ gen(src, size, size, CV_8UC3, 0, 256);\r
+\r
+ xmap.create(size, size, CV_32F);\r
+ ymap.create(size, size, CV_32F);\r
+ for (int i = 0; i < size; ++i)\r
+ {\r
+ float* xmap_row = xmap.ptr<float>(i);\r
+ float* ymap_row = ymap.ptr<float>(i);\r
+ for (int j = 0; j < size; ++j)\r
+ {\r
+ xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;\r
+ ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;\r
+ }\r
+ }\r
+\r
+ dst.create(xmap.size(), src.type());\r
+\r
+ CPU_ON;\r
+ remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);\r
+ CPU_OFF;\r
+\r
+ d_src = src;\r
+ d_xmap = xmap;\r
+ d_ymap = ymap;\r
+ d_dst.create(d_xmap.size(), d_src.type());\r
+\r
+ GPU_ON;\r
+ gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);\r
+ GPU_OFF;\r
+ }\r
+\r
+ for (int size = 1000; size <= 4000; size *= 2)\r
+ {\r
+ SUBTEST << "src " << size << ", 8UC4";\r
+\r
+ gen(src, size, size, CV_8UC4, 0, 256);\r
+\r
+ xmap.create(size, size, CV_32F);\r
+ ymap.create(size, size, CV_32F);\r
+ for (int i = 0; i < size; ++i)\r
+ {\r
+ float* xmap_row = xmap.ptr<float>(i);\r
+ float* ymap_row = ymap.ptr<float>(i);\r
+ for (int j = 0; j < size; ++j)\r
+ {\r
+ xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;\r
+ ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;\r
+ }\r
+ }\r
+\r
+ dst.create(xmap.size(), src.type());\r
+\r
+ CPU_ON;\r
+ remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);\r
+ CPU_OFF;\r
+\r
+ d_src = src;\r
+ d_xmap = xmap;\r
+ d_ymap = ymap;\r
+ d_dst.create(d_xmap.size(), d_src.type());\r
+\r
+ GPU_ON;\r
+ gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);\r
+ GPU_OFF;\r
+ }\r
+\r
+ for (int size = 1000; size <= 4000; size *= 2)\r
+ {\r
+ SUBTEST << "src " << size << ", 16SC3";\r
+\r
+ gen(src, size, size, CV_16SC3, 0, 256);\r
+\r
+ xmap.create(size, size, CV_32F);\r
+ ymap.create(size, size, CV_32F);\r
+ for (int i = 0; i < size; ++i)\r
+ {\r
+ float* xmap_row = xmap.ptr<float>(i);\r
+ float* ymap_row = ymap.ptr<float>(i);\r
+ for (int j = 0; j < size; ++j)\r
+ {\r
+ xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;\r
+ ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;\r
+ }\r
+ }\r
+\r
+ dst.create(xmap.size(), src.type());\r
+\r
+ CPU_ON;\r
+ remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);\r
CPU_OFF;\r
\r
d_src = src;\r
d_dst.create(d_xmap.size(), d_src.type());\r
\r
GPU_ON;\r
- gpu::remap(d_src, d_dst, d_xmap, d_ymap);\r
+ gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);\r
GPU_OFF;\r
}\r
}\r