//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC\r
CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR, Stream& stream = Stream::Null());\r
\r
+ //! builds spherical warping maps\r
+ CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
+ GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null());\r
+\r
//! rotate 8bit single or four channel image\r
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC\r
//! supports CV_8UC1, CV_8UC4 types\r
CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method);\r
\r
//! downsamples image\r
- CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst, int k=2);\r
+ CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst);\r
+\r
+ //! upsamples image\r
+ CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst);\r
+\r
+ //! smoothes the source image and downsamples it\r
+ CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst);\r
+\r
+ //! upsamples the source image and then smoothes it\r
+ CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst);\r
\r
//! performs linear blending of two images\r
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero\r
CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, \r
- GpuMat& result, Stream& stream = Stream::Null());\r
+ GpuMat& result, Stream& stream = Stream::Null()); \r
\r
////////////////////////////// Matrix reductions //////////////////////////////\r
\r
template void threshold_gpu<int>(const DevMem2D& src, const DevMem2D& dst, int thresh, int maxVal, int type, cudaStream_t stream);\r
template void threshold_gpu<float>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);\r
template void threshold_gpu<double>(const DevMem2D& src, const DevMem2D& dst, double thresh, double maxVal, int type, cudaStream_t stream);\r
+\r
+\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // subtract\r
+\r
+ template <typename T>\r
+ class SubtractOp\r
+ {\r
+ public:\r
+ __device__ __forceinline__ T operator()(const T& l, const T& r) const\r
+ {\r
+ return l - r;\r
+ }\r
+ };\r
+\r
+ template <typename T>\r
+ void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)\r
+ {\r
+ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, SubtractOp<T>(), stream);\r
+ }\r
+\r
+ template void subtractCaller<short>(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);\r
}}}\r
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearRowFilter_gpu<short , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearRowFilter_gpu<short2, float2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
+ template void linearRowFilter_gpu<short3, float3>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearRowFilter_gpu<int , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearRowFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
}}}\r
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
\r
- B<T> b(src.rows, src.step / src.elemSize());\r
+ B<T> b(src.rows, src.step);\r
\r
if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))\r
{\r
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearColumnFilter_gpu<float , short >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearColumnFilter_gpu<float2, short2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
+ template void linearColumnFilter_gpu<float3, short3>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearColumnFilter_gpu<float , int >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
template void linearColumnFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);\r
}}}\r
\r
#include "internal_shared.hpp"\r
\r
-#ifndef CV_PI_F\r
- #ifndef CV_PI\r
- #define CV_PI_F 3.14159265f\r
- #else\r
- #define CV_PI_F ((float)CV_PI)\r
- #endif\r
-#endif\r
-\r
// Other values are not supported\r
#define CELL_WIDTH 8\r
#define CELL_HEIGHT 8\r
void resize_8UC1(const DevMem2D& src, DevMem2D dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }\r
void resize_8UC4(const DevMem2D& src, DevMem2D dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }\r
\r
-}}}
\ No newline at end of file
+}}}\r
}\r
}\r
\r
- __global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy, size_t map_step, \r
- uchar* dst, size_t dst_step, int width, int height)\r
+ __global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy,\r
+ size_t map_step, uchar* dst, size_t dst_step, int width, int height)\r
{ \r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
grid.x = divUp(dst.cols, threads.x);\r
grid.y = divUp(dst.rows, threads.y);\r
\r
- tex_remap.filterMode = cudaFilterModeLinear; \r
+ tex_remap.filterMode = cudaFilterModeLinear;\r
tex_remap.addressMode[0] = tex_remap.addressMode[1] = cudaAddressModeWrap;\r
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();\r
cudaSafeCall( cudaBindTexture2D(0, tex_remap, src.data, desc, src.cols, src.rows, src.step) );\r
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);\r
cudaSafeCall( cudaGetLastError() );\r
\r
- cudaSafeCall( cudaDeviceSynchronize() ); \r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
cudaSafeCall( cudaUnbindTexture(tex_remap) );\r
}\r
\r
grid.y = divUp(dst.rows, threads.y);\r
\r
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);\r
- cudaSafeCall( cudaGetLastError() );\r
\r
- cudaSafeCall( cudaDeviceSynchronize() ); \r
+ cudaSafeCall( cudaGetLastError() );\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
+\r
//////////////////////////////////////////////////////////////////////////\r
// mulSpectrums\r
\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
+\r
//////////////////////////////////////////////////////////////////////////\r
// mulSpectrums_CONJ\r
\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
+\r
//////////////////////////////////////////////////////////////////////////\r
// mulAndScaleSpectrums\r
\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
+\r
//////////////////////////////////////////////////////////////////////////\r
// mulAndScaleSpectrums_CONJ\r
\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
+\r
/////////////////////////////////////////////////////////////////////////\r
// downsample\r
\r
- template <typename T>\r
- __global__ void downsampleKernel(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)\r
+ template <typename T, int cn>\r
+ __global__ void downsampleKernel(const PtrStep_<T> src, DevMem2D_<T> dst)\r
{\r
int x = blockIdx.x * blockDim.x + threadIdx.x;\r
int y = blockIdx.y * blockDim.y + threadIdx.y;\r
\r
- if (x < cols && y < rows)\r
- dst.ptr(y)[x] = src.ptr(y * k)[x * k];\r
+ if (x < dst.cols && y < dst.rows)\r
+ {\r
+ int ch_x = x / cn;\r
+ dst.ptr(y)[x] = src.ptr(y*2)[ch_x*2*cn + x - ch_x*cn];\r
+ }\r
}\r
\r
\r
- template <typename T>\r
- void downsampleCaller(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)\r
+ template <typename T, int cn>\r
+ void downsampleCaller(const DevMem2D src, DevMem2D dst)\r
{\r
- dim3 threads(16, 16);\r
- dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));\r
+ dim3 threads(32, 8);\r
+ dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y));\r
\r
- downsampleKernel<<<grid, threads>>>(src, rows, cols, k, dst);\r
- cudaSafeCall( cudaGetLastError() );\r
+ downsampleKernel<T,cn><<<grid,threads>>>(DevMem2D_<T>(src), DevMem2D_<T>(dst));\r
+ cudaSafeCall(cudaGetLastError());\r
+ cudaSafeCall(cudaDeviceSynchronize());\r
+ }\r
\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
+\r
+ template void downsampleCaller<uchar,1>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<uchar,2>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<uchar,3>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<uchar,4>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<short,1>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<short,2>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<short,3>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<short,4>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<float,1>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<float,2>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<float,3>(const DevMem2D src, DevMem2D dst);\r
+ template void downsampleCaller<float,4>(const DevMem2D src, DevMem2D dst);\r
+\r
+\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // upsample\r
+\r
+ template <typename T, int cn>\r
+ __global__ void upsampleKernel(const PtrStep_<T> src, DevMem2D_<T> dst)\r
+ {\r
+ int x = blockIdx.x * blockDim.x + threadIdx.x;\r
+ int y = blockIdx.y * blockDim.y + threadIdx.y;\r
+\r
+ if (x < dst.cols && y < dst.rows)\r
+ {\r
+ int ch_x = x / cn;\r
+ T val = ((ch_x & 1) || (y & 1)) ? 0 : src.ptr(y/2)[ch_x/2*cn + x - ch_x*cn];\r
+ dst.ptr(y)[x] = val;\r
+ }\r
+ }\r
+\r
+\r
+ template <typename T, int cn>\r
+ void upsampleCaller(const DevMem2D src, DevMem2D dst)\r
+ {\r
+ dim3 threads(32, 8);\r
+ dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y));\r
+\r
+ upsampleKernel<T,cn><<<grid,threads>>>(DevMem2D_<T>(src), DevMem2D_<T>(dst));\r
+ cudaSafeCall(cudaGetLastError());\r
+ cudaSafeCall(cudaDeviceSynchronize());\r
+ }\r
+\r
+\r
+ template void upsampleCaller<uchar,1>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<uchar,2>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<uchar,3>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<uchar,4>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<short,1>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<short,2>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<short,3>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<short,4>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<float,1>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<float,2>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<float,3>(const DevMem2D src, DevMem2D dst);\r
+ template void upsampleCaller<float,4>(const DevMem2D src, DevMem2D dst);\r
+\r
+\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // buildWarpMaps\r
+\r
+ namespace build_warp_maps\r
+ {\r
+ __constant__ float cr[9];\r
+ __constant__ float crinv[9];\r
+ __constant__ float cf, cs;\r
+ __constant__ float chalf_w, chalf_h;\r
+ }\r
+\r
+\r
+ class SphericalMapper\r
+ {\r
+ public:\r
+ static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y)\r
+ {\r
+ using namespace build_warp_maps;\r
+\r
+ v /= cs;\r
+ u /= cs;\r
+\r
+ float sinv = sinf(v);\r
+ float x_ = sinv * sinf(u);\r
+ float y_ = -cosf(v);\r
+ float z_ = sinv * cosf(u);\r
+\r
+ float z;\r
+ x = crinv[0]*x_ + crinv[1]*y_ + crinv[2]*z_;\r
+ y = crinv[3]*x_ + crinv[4]*y_ + crinv[5]*z_;\r
+ z = crinv[6]*x_ + crinv[7]*y_ + crinv[8]*z_;\r
+\r
+ x = cf*x/z + chalf_w;\r
+ y = cf*y/z + chalf_h;\r
+ }\r
+ };\r
+\r
+\r
+ template <typename Mapper>\r
+ __global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows,\r
+ PtrStepf map_x, PtrStepf map_y)\r
+ {\r
+ int du = blockIdx.x * blockDim.x + threadIdx.x;\r
+ int dv = blockIdx.y * blockDim.y + threadIdx.y;\r
+ if (du < cols && dv < rows)\r
+ {\r
+ float u = tl_u + du;\r
+ float v = tl_v + dv;\r
+ float x, y;\r
+ Mapper::mapBackward(u, v, x, y);\r
+ map_x.ptr(dv)[du] = x;\r
+ map_y.ptr(dv)[du] = y;\r
+ }\r
+ }\r
+\r
+\r
+ void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
+ const float r[9], const float rinv[9], float f, float s,\r
+ float half_w, float half_h, cudaStream_t stream)\r
+ {\r
+ cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr, r, 9*sizeof(float)));\r
+ cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::crinv, rinv, 9*sizeof(float)));\r
+ cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cf, &f, sizeof(float)));\r
+ cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cs, &s, sizeof(float)));\r
+ cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::chalf_w, &half_w, sizeof(float)));\r
+ cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::chalf_h, &half_h, sizeof(float)));\r
+\r
+ int cols = map_x.cols;\r
+ int rows = map_x.rows;\r
+\r
+ dim3 threads(32, 8);\r
+ dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));\r
+\r
+ buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);\r
+ cudaSafeCall(cudaGetLastError());\r
+ if (stream == 0)\r
+ cudaSafeCall(cudaDeviceSynchronize());\r
}\r
\r
- template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst);\r
- template void downsampleCaller(const PtrStepf src, int rows, int cols, int k, PtrStepf dst);\r
\r
}}}\r
\r
#include "npp.h"\r
#include "NPP_staging.hpp"\r
\r
+#ifndef CV_PI_F\r
+ #ifndef CV_PI\r
+ #define CV_PI_F 3.14159265f\r
+ #else\r
+ #define CV_PI_F ((float)CV_PI)\r
+ #endif\r
+#endif\r
+\r
namespace cv\r
{\r
namespace gpu\r
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, StreamAccessor::getStream(stream));\r
}\r
\r
+namespace cv { namespace gpu { namespace mathfunc\r
+{\r
+ template <typename T>\r
+ void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);\r
+}}}\r
+\r
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)\r
{\r
- nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R, StreamAccessor::getStream(stream));\r
+ if (src1.depth() == CV_16S && src2.depth() == CV_16S)\r
+ {\r
+ CV_Assert(src1.size() == src2.size());\r
+ dst.create(src1.size(), src1.type());\r
+ mathfunc::subtractCaller<short>(src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));\r
+ }\r
+ else\r
+ nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R, StreamAccessor::getStream(stream));\r
}\r
\r
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)\r
return thresh;\r
}\r
\r
-#endif
\ No newline at end of file
+#endif\r
Size src_size = src.size();\r
\r
dst.create(src_size, dstType);\r
- dstBuf.create(src_size, bufType);\r
+ ensureSizeIsEnough(src_size, bufType, dstBuf);\r
+ //dstBuf.create(src_size, bufType);\r
\r
if (stream)\r
{\r
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
\r
CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_16SC1 || srcType == CV_16SC2 \r
- || srcType == CV_32SC1 || srcType == CV_32FC1);\r
+ || srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1);\r
\r
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));\r
\r
case CV_16SC2:\r
func = filters::linearRowFilter_gpu<short2, float2>;\r
break;\r
+ case CV_16SC3:\r
+ func = filters::linearRowFilter_gpu<short3, float3>;\r
+ break;\r
case CV_32SC1:\r
func = filters::linearRowFilter_gpu<int, float>;\r
break;\r
int gpuBorderType;\r
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
\r
- CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC1 || dstType == CV_16SC2 \r
- || dstType == CV_32SC1 || dstType == CV_32FC1);\r
+ CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC1 || dstType == CV_16SC2\r
+ || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1);\r
\r
CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));\r
\r
case CV_16SC2:\r
func = filters::linearColumnFilter_gpu<float2, short2>;\r
break;\r
+ case CV_16SC3:\r
+ func = filters::linearColumnFilter_gpu<float3, short3>;\r
+ break;\r
case CV_32SC1:\r
func = filters::linearColumnFilter_gpu<float, int>;\r
break;\r
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
+void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, double, double,\r
+ GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }\r
void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }\r
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }\r
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }\r
-void cv::gpu::downsample(const GpuMat&, GpuMat&, int) { throw_nogpu(); }\r
+void cv::gpu::downsample(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::upsample(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::pyrDown(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::pyrUp(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+\r
\r
\r
#else /* !defined (HAVE_CUDA) */\r
nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s));\r
}\r
\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
+// buildWarpSphericalMaps\r
+\r
+namespace cv { namespace gpu { namespace imgproc\r
+{\r
+ void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
+ const float r[9], const float rinv[9], float f, float s,\r
+ float half_w, float half_h, cudaStream_t stream);\r
+}}}\r
+\r
+void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
+ GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
+{\r
+ CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
+ Mat Rinv = R.inv();\r
+ CV_Assert(Rinv.isContinuous());\r
+\r
+ map_x.create(dst_roi.size(), CV_32F);\r
+ map_y.create(dst_roi.size(), CV_32F);\r
+ imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
+ f, s, 0.5f*src_size.width, 0.5f*src_size.height, StreamAccessor::getStream(stream));\r
+}\r
+\r
////////////////////////////////////////////////////////////////////////\r
// rotate\r
\r
cufftSafeCall(cufftDestroy(planC2R));\r
}\r
\r
+\r
////////////////////////////////////////////////////////////////////\r
// downsample\r
\r
namespace cv { namespace gpu { namespace imgproc\r
{\r
- template <typename T>\r
- void downsampleCaller(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst);\r
+ template <typename T, int cn>\r
+ void downsampleCaller(const DevMem2D src, DevMem2D dst);\r
}}}\r
\r
-void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, int k)\r
+\r
+void cv::gpu::downsample(const GpuMat& src, GpuMat& dst)\r
{\r
- CV_Assert(src.channels() == 1); \r
+ CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
+\r
+ typedef void (*Caller)(const DevMem2D, DevMem2D);\r
+ static const Caller callers[6][4] =\r
+ {{imgproc::downsampleCaller<uchar,1>, imgproc::downsampleCaller<uchar,2>,\r
+ imgproc::downsampleCaller<uchar,3>, imgproc::downsampleCaller<uchar,4>},\r
+ {0,0,0,0}, {0,0,0,0},\r
+ {imgproc::downsampleCaller<short,1>, imgproc::downsampleCaller<short,2>,\r
+ imgproc::downsampleCaller<short,3>, imgproc::downsampleCaller<short,4>},\r
+ {0,0,0,0},\r
+ {imgproc::downsampleCaller<float,1>, imgproc::downsampleCaller<float,2>,\r
+ imgproc::downsampleCaller<float,3>, imgproc::downsampleCaller<float,4>}};\r
+\r
+ Caller caller = callers[src.depth()][src.channels()-1];\r
+ if (!caller)\r
+ CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
+\r
+ dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
+ caller(src, dst.reshape(1));\r
+}\r
\r
- dst.create((src.rows + k - 1) / k, (src.cols + k - 1) / k, src.type());\r
\r
- switch (src.depth())\r
- {\r
- case CV_8U:\r
- imgproc::downsampleCaller<uchar>(src, dst.rows, dst.cols, k, dst);\r
- break;\r
- case CV_32F:\r
- imgproc::downsampleCaller<float>(src, dst.rows, dst.cols, k, dst);\r
- break;\r
- default:\r
- CV_Error(CV_StsUnsupportedFormat, "bad image depth in downsample function");\r
- }\r
+//////////////////////////////////////////////////////////////////////////////\r
+// upsample\r
+\r
+namespace cv { namespace gpu { namespace imgproc\r
+{\r
+ template <typename T, int cn>\r
+ void upsampleCaller(const DevMem2D src, DevMem2D dst);\r
+}}}\r
+\r
+\r
+void cv::gpu::upsample(const GpuMat& src, GpuMat& dst)\r
+{\r
+ CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
+\r
+ typedef void (*Caller)(const DevMem2D, DevMem2D);\r
+ static const Caller callers[6][5] =\r
+ {{imgproc::upsampleCaller<uchar,1>, imgproc::upsampleCaller<uchar,2>,\r
+ imgproc::upsampleCaller<uchar,3>, imgproc::upsampleCaller<uchar,4>},\r
+ {0,0,0,0}, {0,0,0,0},\r
+ {imgproc::upsampleCaller<short,1>, imgproc::upsampleCaller<short,2>,\r
+ imgproc::upsampleCaller<short,3>, imgproc::upsampleCaller<short,4>},\r
+ {0,0,0,0},\r
+ {imgproc::upsampleCaller<float,1>, imgproc::upsampleCaller<float,2>,\r
+ imgproc::upsampleCaller<float,3>, imgproc::upsampleCaller<float,4>}};\r
+\r
+ Caller caller = callers[src.depth()][src.channels()-1];\r
+ if (!caller)\r
+ CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
+\r
+ dst.create(src.rows*2, src.cols*2, src.type());\r
+ caller(src, dst.reshape(1));\r
+}\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
+// pyrDown\r
+\r
+void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst)\r
+{\r
+ Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth()));\r
+ GpuMat buf;\r
+ sepFilter2D(src, buf, src.depth(), ker, ker);\r
+ downsample(buf, dst);\r
+}\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
+// pyrUp\r
+\r
+void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst)\r
+{\r
+ GpuMat buf;\r
+ upsample(src, buf);\r
+ Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth())) * 2;\r
+ sepFilter2D(buf, dst, buf.depth(), ker, ker);\r
}\r
\r
#endif /* !defined (HAVE_CUDA) */\r
void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)\r
{\r
if (m.type() == type && m.rows >= rows && m.cols >= cols)\r
- return;\r
- m.create(rows, cols, type);\r
+ m = m(Rect(0, 0, cols, rows));\r
+ else\r
+ m.create(rows, cols, type);\r
}\r
\r
\r
template <typename T>\r
__device__ __forceinline__ D at_low(int i, const T* data) const \r
{\r
- return saturate_cast<D>(data[idx_low(i) * step]);\r
+ return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));\r
}\r
\r
template <typename T>\r
__device__ __forceinline__ D at_high(int i, const T* data) const \r
{\r
- return saturate_cast<D>(data[idx_high(i) * step]);\r
+ return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));\r
}\r
\r
private:\r
template <typename T>\r
__device__ __forceinline__ D at_low(int i, const T* data) const \r
{\r
- return saturate_cast<D>(data[idx_low(i) * step]);\r
+ return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));\r
}\r
\r
template <typename T>\r
__device__ __forceinline__ D at_high(int i, const T* data) const \r
{\r
- return saturate_cast<D>(data[idx_high(i) * step]);\r
+ return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));\r
}\r
\r
private:\r
template <typename T>\r
__device__ __forceinline__ D at_low(int i, const T* data) const \r
{\r
- return i >= 0 ? saturate_cast<D>(data[i * step]) : val;\r
+ return i >= 0 ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;\r
}\r
\r
template <typename T>\r
__device__ __forceinline__ D at_high(int i, const T* data) const \r
{\r
- return i < len ? saturate_cast<D>(data[i * step]) : val;\r
+ return i < len ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;\r
}\r
\r
bool is_range_safe(int mini, int maxi) const \r
int step;\r
D val;\r
};\r
+\r
+\r
+ template <typename OutT>\r
+ struct BrdConstant\r
+ {\r
+ BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {}\r
+\r
+ __device__ __forceinline__ OutT at(int x, int y, const uchar* data, int step) const\r
+ {\r
+ if (x >= 0 && x <= w - 1 && y >= 0 && y <= h - 1)\r
+ return ((const OutT*)(data + y * step))[x];\r
+ return val;\r
+ }\r
+\r
+ private:\r
+ int w, h;\r
+ OutT val;\r
+ };\r
+\r
}}}\r
\r
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__\r
\r
INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::ValuesIn(devices()));\r
\r
-////////////////////////////////////////////////////////////////////////////////\r
-// Downsample\r
-\r
-struct Downsample : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >\r
-{\r
- cv::gpu::DeviceInfo devInfo;\r
- int k;\r
-\r
- cv::Size size;\r
-\r
- cv::Size dst_gold_size;\r
-\r
- virtual void SetUp()\r
- {\r
- devInfo = std::tr1::get<0>(GetParam());\r
- k = std::tr1::get<1>(GetParam());\r
-\r
- cv::gpu::setDevice(devInfo.deviceID());\r
- \r
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();\r
-\r
- size = cv::Size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);\r
-\r
- dst_gold_size = cv::Size((size.width + k - 1) / k, (size.height + k - 1) / k);\r
- }\r
-};\r
-\r
-TEST_P(Downsample, Accuracy8U)\r
-{\r
- PRINT_PARAM(devInfo);\r
- PRINT_PARAM(size);\r
- PRINT_PARAM(k);\r
-\r
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();\r
-\r
- cv::Mat src = cvtest::randomMat(rng, size, CV_8U, 0, 255, false);\r
-\r
- cv::Mat dst;\r
-\r
- ASSERT_NO_THROW(\r
- cv::gpu::GpuMat gpures;\r
- cv::gpu::downsample(cv::gpu::GpuMat(src), gpures, k);\r
- gpures.download(dst);\r
- );\r
-\r
- ASSERT_EQ(dst_gold_size, dst.size());\r
-\r
- for (int y = 0; y < dst.rows; ++y)\r
- {\r
- for (int x = 0; x < dst.cols; ++x)\r
- {\r
- int gold = src.at<uchar>(y * k, x * k);\r
- int res = dst.at<uchar>(y, x);\r
- ASSERT_EQ(gold, res);\r
- }\r
- }\r
-}\r
-\r
-TEST_P(Downsample, Accuracy32F)\r
-{\r
- PRINT_PARAM(devInfo);\r
- PRINT_PARAM(size);\r
- PRINT_PARAM(k);\r
-\r
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();\r
-\r
- cv::Mat src = cvtest::randomMat(rng, size, CV_32F, 0, 1.0, false);\r
-\r
- cv::Mat dst;\r
-\r
- ASSERT_NO_THROW(\r
- cv::gpu::GpuMat gpures;\r
- cv::gpu::downsample(cv::gpu::GpuMat(src), gpures, k);\r
- gpures.download(dst);\r
- );\r
-\r
- ASSERT_EQ(dst_gold_size, dst.size());\r
-\r
- for (int y = 0; y < dst.rows; ++y)\r
- {\r
- for (int x = 0; x < dst.cols; ++x)\r
- {\r
- float gold = src.at<float>(y * k, x * k);\r
- float res = dst.at<float>(y, x);\r
- ASSERT_FLOAT_EQ(gold, res);\r
- }\r
- }\r
-}\r
-\r
-INSTANTIATE_TEST_CASE_P(ImgProc, Downsample, testing::Combine(\r
- testing::ValuesIn(devices()), \r
- testing::Range(2, 6)));\r
-\r
////////////////////////////////////////////////////////////////////////////////\r
// meanShift\r
\r
\r
static const float WEIGHT_EPS = 1e-5f;\r
\r
-Ptr<Blender> Blender::createDefault(int type)\r
+Ptr<Blender> Blender::createDefault(int type, bool try_gpu)\r
{\r
if (type == NO)\r
return new Blender();\r
if (type == FEATHER)\r
return new FeatherBlender();\r
if (type == MULTI_BAND)\r
- return new MultiBandBlender();\r
+ return new MultiBandBlender(try_gpu);\r
CV_Error(CV_StsBadArg, "unsupported blending method");\r
return NULL;\r
}\r
}\r
\r
\r
+MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands)\r
+{\r
+ setNumBands(num_bands);\r
+ can_use_gpu_ = try_gpu && gpu::getCudaEnabledDeviceCount();\r
+}\r
+\r
+\r
void MultiBandBlender::prepare(Rect dst_roi)\r
{\r
dst_roi_final_ = dst_roi;\r
int right = br_new.x - tl.x - img.cols;\r
\r
// Create the source image Laplacian pyramid\r
- vector<Mat> src_pyr_gauss(num_bands_ + 1);\r
- copyMakeBorder(img, src_pyr_gauss[0], top, bottom, left, right, \r
+ Mat img_with_border;\r
+ copyMakeBorder(img, img_with_border, top, bottom, left, right,\r
BORDER_REFLECT);\r
- for (int i = 0; i < num_bands_; ++i)\r
- pyrDown(src_pyr_gauss[i], src_pyr_gauss[i + 1]);\r
vector<Mat> src_pyr_laplace;\r
- createLaplacePyr(src_pyr_gauss, src_pyr_laplace);\r
- src_pyr_gauss.clear();\r
+ if (can_use_gpu_)\r
+ createLaplacePyrGpu(img_with_border, num_bands_, src_pyr_laplace);\r
+ else\r
+ createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace);\r
\r
// Create the weight map Gaussian pyramid\r
Mat weight_map;\r
}\r
x_tl /= 2; y_tl /= 2; \r
x_br /= 2; y_br /= 2;\r
- } \r
+ }\r
}\r
\r
\r
}\r
\r
\r
-void createLaplacePyr(const vector<Mat> &pyr_gauss, vector<Mat> &pyr_laplace)\r
+void createLaplacePyr(const Mat &img, int num_levels, vector<Mat> &pyr)\r
{\r
- if (pyr_gauss.size() == 0)\r
- return;\r
- pyr_laplace.resize(pyr_gauss.size());\r
+ pyr.resize(num_levels + 1);\r
+ pyr[0] = img;\r
+ for (int i = 0; i < num_levels; ++i)\r
+ pyrDown(pyr[i], pyr[i + 1]);\r
Mat tmp;\r
- for (size_t i = 0; i < pyr_laplace.size() - 1; ++i)\r
+ for (int i = 0; i < num_levels; ++i)\r
{\r
- pyrUp(pyr_gauss[i + 1], tmp, pyr_gauss[i].size());\r
- subtract(pyr_gauss[i], tmp, pyr_laplace[i]);\r
+ pyrUp(pyr[i + 1], tmp, pyr[i].size());\r
+ subtract(pyr[i], tmp, pyr[i]);\r
}\r
- pyr_laplace[pyr_laplace.size() - 1] = pyr_gauss[pyr_laplace.size() - 1].clone();\r
}\r
\r
\r
+void createLaplacePyrGpu(const Mat &img, int num_levels, vector<Mat> &pyr)\r
+{\r
+ pyr.resize(num_levels + 1);\r
+\r
+ vector<gpu::GpuMat> gpu_pyr(num_levels + 1);\r
+ gpu_pyr[0] = img;\r
+ for (int i = 0; i < num_levels; ++i)\r
+ gpu::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]);\r
+\r
+ gpu::GpuMat tmp;\r
+ for (int i = 0; i < num_levels; ++i)\r
+ {\r
+ gpu::pyrUp(gpu_pyr[i + 1], tmp);\r
+ gpu::subtract(gpu_pyr[i], tmp, gpu_pyr[i]);\r
+ pyr[i] = gpu_pyr[i];\r
+ }\r
+\r
+ pyr[num_levels] = gpu_pyr[num_levels];\r
+}\r
+\r
+\r
+\r
void restoreImageFromLaplacePyr(vector<Mat> &pyr)\r
{\r
if (pyr.size() == 0)\r
// or tort (including negligence or otherwise) arising in any way out of\r
// the use of this software, even if advised of the possibility of such damage.\r
//\r
-//M*/
-#ifndef __OPENCV_BLENDERS_HPP__
-#define __OPENCV_BLENDERS_HPP__
-
-#include "precomp.hpp"
-
-// Simple blender which puts one image over another
-class Blender
-{
-public:
- enum { NO, FEATHER, MULTI_BAND };
- static cv::Ptr<Blender> createDefault(int type);
-
- void prepare(const std::vector<cv::Point> &corners, const std::vector<cv::Size> &sizes);
- virtual void prepare(cv::Rect dst_roi);
- virtual void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl);
- virtual void blend(cv::Mat &dst, cv::Mat &dst_mask);
-
-protected:
- cv::Mat dst_, dst_mask_;
- cv::Rect dst_roi_;
-};
-
-
-class FeatherBlender : public Blender
-{
-public:
- FeatherBlender(float sharpness = 0.02f) { setSharpness(sharpness); }
- float sharpness() const { return sharpness_; }
- void setSharpness(float val) { sharpness_ = val; }
-
- void prepare(cv::Rect dst_roi);
- void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl);
- void blend(cv::Mat &dst, cv::Mat &dst_mask);
-
-private:
- float sharpness_;
- cv::Mat weight_map_;
- cv::Mat dst_weight_map_;
-};
-
-
-class MultiBandBlender : public Blender
-{
-public:
- MultiBandBlender(int num_bands = 5) { setNumBands(num_bands); }
- int numBands() const { return actual_num_bands_; }
- void setNumBands(int val) { actual_num_bands_ = val; }
-
- void prepare(cv::Rect dst_roi);
- void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl);
- void blend(cv::Mat &dst, cv::Mat &dst_mask);
-
-private:
- int actual_num_bands_, num_bands_;
- std::vector<cv::Mat> dst_pyr_laplace_;
- std::vector<cv::Mat> dst_band_weights_;
- cv::Rect dst_roi_final_;
-};
-
-
-//////////////////////////////////////////////////////////////////////////////
-// Auxiliary functions
-
-void normalize(const cv::Mat& weight, cv::Mat& src);
-
-void createWeightMap(const cv::Mat& mask, float sharpness, cv::Mat& weight);
-
-void createLaplacePyr(const std::vector<cv::Mat>& pyr_gauss, std::vector<cv::Mat>& pyr_laplace);
-
-// Restores source image in-place (result will be stored in pyr[0])
-void restoreImageFromLaplacePyr(std::vector<cv::Mat>& pyr);
-
-#endif // __OPENCV_BLENDERS_HPP__
+//M*/\r
+#ifndef __OPENCV_BLENDERS_HPP__\r
+#define __OPENCV_BLENDERS_HPP__\r
+\r
+#include "precomp.hpp"\r
+\r
+// Simple blender which puts one image over another\r
+class Blender\r
+{\r
+public:\r
+ enum { NO, FEATHER, MULTI_BAND };\r
+ static cv::Ptr<Blender> createDefault(int type, bool try_gpu = false);\r
+\r
+ void prepare(const std::vector<cv::Point> &corners, const std::vector<cv::Size> &sizes);\r
+ virtual void prepare(cv::Rect dst_roi);\r
+ virtual void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl);\r
+ virtual void blend(cv::Mat &dst, cv::Mat &dst_mask);\r
+\r
+protected:\r
+ cv::Mat dst_, dst_mask_;\r
+ cv::Rect dst_roi_;\r
+};\r
+\r
+\r
+class FeatherBlender : public Blender\r
+{\r
+public:\r
+ FeatherBlender(float sharpness = 0.02f) { setSharpness(sharpness); }\r
+ float sharpness() const { return sharpness_; }\r
+ void setSharpness(float val) { sharpness_ = val; }\r
+\r
+ void prepare(cv::Rect dst_roi);\r
+ void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl);\r
+ void blend(cv::Mat &dst, cv::Mat &dst_mask);\r
+\r
+private:\r
+ float sharpness_;\r
+ cv::Mat weight_map_;\r
+ cv::Mat dst_weight_map_;\r
+};\r
+\r
+\r
+class MultiBandBlender : public Blender\r
+{\r
+public:\r
+ MultiBandBlender(int try_gpu = false, int num_bands = 5);\r
+ int numBands() const { return actual_num_bands_; }\r
+ void setNumBands(int val) { actual_num_bands_ = val; }\r
+\r
+ void prepare(cv::Rect dst_roi);\r
+ void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl);\r
+ void blend(cv::Mat &dst, cv::Mat &dst_mask);\r
+\r
+private:\r
+ int actual_num_bands_, num_bands_;\r
+ std::vector<cv::Mat> dst_pyr_laplace_;\r
+ std::vector<cv::Mat> dst_band_weights_;\r
+ cv::Rect dst_roi_final_;\r
+ bool can_use_gpu_;\r
+};\r
+\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
+// Auxiliary functions\r
+\r
+void normalize(const cv::Mat& weight, cv::Mat& src);\r
+\r
+void createWeightMap(const cv::Mat& mask, float sharpness, cv::Mat& weight);\r
+\r
+void createLaplacePyr(const cv::Mat &img, int num_levels, std::vector<cv::Mat>& pyr);\r
+void createLaplacePyrGpu(const cv::Mat &img, int num_levels, std::vector<cv::Mat>& pyr);\r
+\r
+// Restores source image in-place (result will be stored in pyr[0])\r
+void restoreImageFromLaplacePyr(std::vector<cv::Mat>& pyr);\r
+\r
+#endif // __OPENCV_BLENDERS_HPP__\r
//\r
//M*/\r
\r
-// We follow to methods described in these two papers:\r
+// We follow to these papers:\r
// 1) Construction of panoramic mosaics with global and local alignment. \r
// Heung-Yeung Shum and Richard Szeliski. 2000.\r
// 2) Eliminating Ghosting and Exposure Artifacts in Image Mosaics. \r
\r
// Warp images and their masks\r
Ptr<Warper> warper = Warper::createByCameraFocal(static_cast<float>(warped_image_scale * seam_work_aspect), \r
- warp_type);\r
+ warp_type, try_gpu);\r
for (int i = 0; i < num_images; ++i)\r
{\r
corners[i] = warper->warp(images[i], static_cast<float>(cameras[i].focal * seam_work_aspect), \r
\r
// Update warped image scale\r
warped_image_scale *= static_cast<float>(compose_work_aspect);\r
- warper = Warper::createByCameraFocal(warped_image_scale, warp_type);\r
+ warper = Warper::createByCameraFocal(warped_image_scale, warp_type, try_gpu);\r
\r
// Update corners and sizes\r
for (int i = 0; i < num_images; ++i)\r
img_warped.convertTo(img_warped_s, CV_16S);\r
img_warped.release();\r
img.release();\r
- mask.release();\r
+ mask.release(); \r
\r
dilate(masks_warped[img_idx], dilated_mask, Mat());\r
resize(dilated_mask, seam_mask, mask_warped.size());\r
mask_warped = seam_mask & mask_warped;\r
\r
if (static_cast<Blender*>(blender) == 0)\r
- {\r
- blender = Blender::createDefault(blend_type);\r
+ { \r
+ blender = Blender::createDefault(blend_type, try_gpu);\r
Size dst_sz = resultRoi(corners, sizes).size();\r
float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;\r
if (blend_width < 1.f)\r
- blender = Blender::createDefault(Blender::NO);\r
+ blender = Blender::createDefault(Blender::NO, try_gpu);\r
else if (blend_type == Blender::MULTI_BAND)\r
{\r
MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(static_cast<Blender*>(blender));\r
}\r
\r
// Blend the current image\r
- blender->feed(img_warped_s, mask_warped, corners[img_idx]);\r
+ blender->feed(img_warped_s, mask_warped, corners[img_idx]); \r
}\r
\r
Mat result, result_mask;\r
\r
namespace \r
{\r
- class PairLess\r
- {\r
- public:\r
- bool operator()(const pair<int,int>& l, const pair<int,int>& r) const\r
- {\r
- return l.first < r.first || (l.first == r.first && l.second < r.second);\r
- }\r
- };\r
- typedef set<pair<int,int>,PairLess> MatchesSet;\r
+ typedef set<pair<int,int> > MatchesSet;\r
\r
// These two classes are aimed to find features matches only, not to \r
// estimate homography\r
// or tort (including negligence or otherwise) arising in any way out of\r
// the use of this software, even if advised of the possibility of such damage.\r
//\r
-//M*/
-#include "warpers.hpp"
-
-using namespace std;
-using namespace cv;
-
-Ptr<Warper> Warper::createByCameraFocal(float focal, int type)
-{
- if (type == PLANE)
- return new PlaneWarper(focal);
- if (type == CYLINDRICAL)
- return new CylindricalWarper(focal);
- if (type == SPHERICAL)
- return new SphericalWarper(focal);
- CV_Error(CV_StsBadArg, "unsupported warping type");
- return NULL;
-}
-
-
-void ProjectorBase::setTransformation(const Mat &R)
-{
- CV_Assert(R.size() == Size(3, 3));
- CV_Assert(R.type() == CV_32F);
- r[0] = R.at<float>(0, 0); r[1] = R.at<float>(0, 1); r[2] = R.at<float>(0, 2);
- r[3] = R.at<float>(1, 0); r[4] = R.at<float>(1, 1); r[5] = R.at<float>(1, 2);
- r[6] = R.at<float>(2, 0); r[7] = R.at<float>(2, 1); r[8] = R.at<float>(2, 2);
-
- Mat Rinv = R.inv();
- rinv[0] = Rinv.at<float>(0, 0); rinv[1] = Rinv.at<float>(0, 1); rinv[2] = Rinv.at<float>(0, 2);
- rinv[3] = Rinv.at<float>(1, 0); rinv[4] = Rinv.at<float>(1, 1); rinv[5] = Rinv.at<float>(1, 2);
- rinv[6] = Rinv.at<float>(2, 0); rinv[7] = Rinv.at<float>(2, 1); rinv[8] = Rinv.at<float>(2, 2);
-}
-
-
-void PlaneWarper::detectResultRoi(Point &dst_tl, Point &dst_br)
-{
- float tl_uf = numeric_limits<float>::max();
- float tl_vf = numeric_limits<float>::max();
- float br_uf = -numeric_limits<float>::max();
- float br_vf = -numeric_limits<float>::max();
-
- float u, v;
-
- projector_.mapForward(0, 0, u, v);
- tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);
- br_uf = max(br_uf, u); br_vf = max(br_vf, v);
-
- projector_.mapForward(0, static_cast<float>(src_size_.height - 1), u, v);
- tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);
- br_uf = max(br_uf, u); br_vf = max(br_vf, v);
-
- projector_.mapForward(static_cast<float>(src_size_.width - 1), 0, u, v);
- tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);
- br_uf = max(br_uf, u); br_vf = max(br_vf, v);
-
- projector_.mapForward(static_cast<float>(src_size_.width - 1), static_cast<float>(src_size_.height - 1), u, v);
- tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);
- br_uf = max(br_uf, u); br_vf = max(br_vf, v);
-
- dst_tl.x = static_cast<int>(tl_uf);
- dst_tl.y = static_cast<int>(tl_vf);
- dst_br.x = static_cast<int>(br_uf);
- dst_br.y = static_cast<int>(br_vf);
-}
-
-
-void SphericalWarper::detectResultRoi(Point &dst_tl, Point &dst_br)
-{
- detectResultRoiByBorder(dst_tl, dst_br);
-
- float tl_uf = static_cast<float>(dst_tl.x);
- float tl_vf = static_cast<float>(dst_tl.y);
- float br_uf = static_cast<float>(dst_br.x);
- float br_vf = static_cast<float>(dst_br.y);
-
- float x = projector_.rinv[1];
- float y = projector_.rinv[4];
- float z = projector_.rinv[7];
- if (y > 0.f)
- {
- x = projector_.focal * x / z + src_size_.width * 0.5f;
- y = projector_.focal * y / z + src_size_.height * 0.5f;
- if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height)
- {
- tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast<float>(CV_PI * projector_.scale));
- br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast<float>(CV_PI * projector_.scale));
- }
- }
-
- x = projector_.rinv[1];
- y = -projector_.rinv[4];
- z = projector_.rinv[7];
- if (y > 0.f)
- {
- x = projector_.focal * x / z + src_size_.width * 0.5f;
- y = projector_.focal * y / z + src_size_.height * 0.5f;
- if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height)
- {
- tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast<float>(0));
- br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast<float>(0));
- }
- }
-
- dst_tl.x = static_cast<int>(tl_uf);
- dst_tl.y = static_cast<int>(tl_vf);
- dst_br.x = static_cast<int>(br_uf);
- dst_br.y = static_cast<int>(br_vf);
-}
+//M*/\r
+#include "warpers.hpp"\r
+\r
+using namespace std;\r
+using namespace cv;\r
+\r
+Ptr<Warper> Warper::createByCameraFocal(float focal, int type, bool try_gpu)\r
+{\r
+ bool can_use_gpu = try_gpu && gpu::getCudaEnabledDeviceCount();\r
+ if (type == PLANE)\r
+ return new PlaneWarper(focal);\r
+ if (type == CYLINDRICAL)\r
+ return new CylindricalWarper(focal);\r
+ if (type == SPHERICAL)\r
+ return !can_use_gpu ? new SphericalWarper(focal) : new SphericalWarperGpu(focal);\r
+ CV_Error(CV_StsBadArg, "unsupported warping type");\r
+ return NULL;\r
+}\r
+\r
+\r
+void ProjectorBase::setTransformation(const Mat &R)\r
+{\r
+ CV_Assert(R.size() == Size(3, 3));\r
+ CV_Assert(R.type() == CV_32F);\r
+ r[0] = R.at<float>(0, 0); r[1] = R.at<float>(0, 1); r[2] = R.at<float>(0, 2);\r
+ r[3] = R.at<float>(1, 0); r[4] = R.at<float>(1, 1); r[5] = R.at<float>(1, 2);\r
+ r[6] = R.at<float>(2, 0); r[7] = R.at<float>(2, 1); r[8] = R.at<float>(2, 2);\r
+\r
+ Mat Rinv = R.inv();\r
+ rinv[0] = Rinv.at<float>(0, 0); rinv[1] = Rinv.at<float>(0, 1); rinv[2] = Rinv.at<float>(0, 2);\r
+ rinv[3] = Rinv.at<float>(1, 0); rinv[4] = Rinv.at<float>(1, 1); rinv[5] = Rinv.at<float>(1, 2);\r
+ rinv[6] = Rinv.at<float>(2, 0); rinv[7] = Rinv.at<float>(2, 1); rinv[8] = Rinv.at<float>(2, 2);\r
+}\r
+\r
+\r
+void PlaneWarper::detectResultRoi(Point &dst_tl, Point &dst_br)\r
+{\r
+ float tl_uf = numeric_limits<float>::max();\r
+ float tl_vf = numeric_limits<float>::max();\r
+ float br_uf = -numeric_limits<float>::max();\r
+ float br_vf = -numeric_limits<float>::max();\r
+\r
+ float u, v;\r
+\r
+ projector_.mapForward(0, 0, u, v);\r
+ tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);\r
+ br_uf = max(br_uf, u); br_vf = max(br_vf, v);\r
+\r
+ projector_.mapForward(0, static_cast<float>(src_size_.height - 1), u, v);\r
+ tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);\r
+ br_uf = max(br_uf, u); br_vf = max(br_vf, v);\r
+\r
+ projector_.mapForward(static_cast<float>(src_size_.width - 1), 0, u, v);\r
+ tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);\r
+ br_uf = max(br_uf, u); br_vf = max(br_vf, v);\r
+\r
+ projector_.mapForward(static_cast<float>(src_size_.width - 1), static_cast<float>(src_size_.height - 1), u, v);\r
+ tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v);\r
+ br_uf = max(br_uf, u); br_vf = max(br_vf, v);\r
+\r
+ dst_tl.x = static_cast<int>(tl_uf);\r
+ dst_tl.y = static_cast<int>(tl_vf);\r
+ dst_br.x = static_cast<int>(br_uf);\r
+ dst_br.y = static_cast<int>(br_vf);\r
+}\r
+\r
+\r
+void SphericalWarper::detectResultRoi(Point &dst_tl, Point &dst_br)\r
+{\r
+ detectResultRoiByBorder(dst_tl, dst_br);\r
+\r
+ float tl_uf = static_cast<float>(dst_tl.x);\r
+ float tl_vf = static_cast<float>(dst_tl.y);\r
+ float br_uf = static_cast<float>(dst_br.x);\r
+ float br_vf = static_cast<float>(dst_br.y);\r
+\r
+ float x = projector_.rinv[1];\r
+ float y = projector_.rinv[4];\r
+ float z = projector_.rinv[7];\r
+ if (y > 0.f)\r
+ {\r
+ x = projector_.focal * x / z + src_size_.width * 0.5f;\r
+ y = projector_.focal * y / z + src_size_.height * 0.5f;\r
+ if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height)\r
+ {\r
+ tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast<float>(CV_PI * projector_.scale));\r
+ br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast<float>(CV_PI * projector_.scale));\r
+ }\r
+ }\r
+\r
+ x = projector_.rinv[1];\r
+ y = -projector_.rinv[4];\r
+ z = projector_.rinv[7];\r
+ if (y > 0.f)\r
+ {\r
+ x = projector_.focal * x / z + src_size_.width * 0.5f;\r
+ y = projector_.focal * y / z + src_size_.height * 0.5f;\r
+ if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height)\r
+ {\r
+ tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast<float>(0));\r
+ br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast<float>(0));\r
+ }\r
+ }\r
+\r
+ dst_tl.x = static_cast<int>(tl_uf);\r
+ dst_tl.y = static_cast<int>(tl_vf);\r
+ dst_br.x = static_cast<int>(br_uf);\r
+ dst_br.y = static_cast<int>(br_vf);\r
+}\r
+\r
+\r
+Point SphericalWarperGpu::warp(const Mat &src, float focal, const Mat &R, Mat &dst,\r
+ int interp_mode, int border_mode)\r
+{\r
+ src_size_ = src.size();\r
+ projector_.size = src.size();\r
+ projector_.focal = focal;\r
+ projector_.setTransformation(R);\r
+\r
+ cv::Point dst_tl, dst_br;\r
+ detectResultRoi(dst_tl, dst_br);\r
+\r
+ gpu::buildWarpSphericalMaps(src.size(), Rect(dst_tl, Point(dst_br.x+1, dst_br.y+1)),\r
+ R, focal, projector_.scale, d_xmap_, d_ymap_);\r
+\r
+ dst.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type());\r
+ remap(src, dst, Mat(d_xmap_), Mat(d_ymap_), interp_mode, border_mode);\r
+\r
+ return dst_tl;\r
+}\r
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
+ /*M///////////////////////////////////////////////////////////////////////////////////////\r
//\r
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
//\r
{\r
public:\r
enum { PLANE, CYLINDRICAL, SPHERICAL };\r
- static cv::Ptr<Warper> createByCameraFocal(float focal, int type);\r
+ static cv::Ptr<Warper> createByCameraFocal(float focal, int type, bool try_gpu = false);\r
\r
virtual ~Warper() {}\r
virtual cv::Point warp(const cv::Mat &src, float focal, const cv::Mat& R, cv::Mat &dst,\r
class WarperBase : public Warper\r
{ \r
public:\r
- cv::Point warp(const cv::Mat &src, float focal, const cv::Mat &R, cv::Mat &dst,\r
- int interp_mode, int border_mode);\r
+ virtual cv::Point warp(const cv::Mat &src, float focal, const cv::Mat &R, cv::Mat &dst,\r
+ int interp_mode, int border_mode);\r
\r
- cv::Rect warpRoi(const cv::Size &sz, float focal, const cv::Mat &R);\r
+ virtual cv::Rect warpRoi(const cv::Size &sz, float focal, const cv::Mat &R);\r
\r
protected:\r
// Detects ROI of the destination image. It's correct for any projection.\r
{\r
void mapForward(float x, float y, float &u, float &v);\r
void mapBackward(float u, float v, float &x, float &y);\r
-\r
float plane_dist;\r
};\r
\r
public:\r
SphericalWarper(float scale = 300.f) { projector_.scale = scale; }\r
\r
-private: \r
+protected:\r
void detectResultRoi(cv::Point &dst_tl, cv::Point &dst_br);\r
};\r
\r
\r
+class SphericalWarperGpu : public SphericalWarper\r
+{\r
+public:\r
+ SphericalWarperGpu(float scale = 300.f) : SphericalWarper(scale) {}\r
+ cv::Point warp(const cv::Mat &src, float focal, const cv::Mat &R, cv::Mat &dst,\r
+ int interp_mode, int border_mode);\r
+\r
+private:\r
+ cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_;\r
+};\r
+\r
+\r
struct CylindricalProjector : ProjectorBase\r
{\r
void mapForward(float x, float y, float &u, float &v);\r
GPU_OFF;\r
}\r
}\r
+\r
+\r
+TEST(GaussianBlur)\r
+{\r
+ for (int size = 1000; size < 10000; size += 3000)\r
+ {\r
+ SUBTEST << "16SC3, size " << size;\r
+\r
+ Mat src; gen(src, size, size, CV_16SC3, 0, 256);\r
+ Mat dst(src.size(), src.type());\r
+\r
+ CPU_ON;\r
+ GaussianBlur(src, dst, Size(5,5), 0);\r
+ CPU_OFF;\r
+\r
+ gpu::GpuMat d_src(src);\r
+ gpu::GpuMat d_dst(src.size(), src.type());\r
+\r
+ GPU_ON;\r
+ gpu::GaussianBlur(d_src, d_dst, Size(5,5), 0);\r
+ GPU_OFF;\r
+ }\r
+\r
+ for (int size = 1000; size < 10000; size += 3000)\r
+ {\r
+ SUBTEST << "8UC4, size " << size;\r
+\r
+ Mat src; gen(src, size, size, CV_8UC4, 0, 256);\r
+ Mat dst(src.size(), src.type());\r
+\r
+ CPU_ON;\r
+ GaussianBlur(src, dst, Size(5,5), 0);\r
+ CPU_OFF;\r
+\r
+ gpu::GpuMat d_src(src);\r
+ gpu::GpuMat d_dst(src.size(), src.type());\r
+\r
+ GPU_ON;\r
+ gpu::GaussianBlur(d_src, d_dst, Size(5,5), 0);\r
+ GPU_OFF;\r
+ }\r
+}\r