CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());\r
\r
//! resizes the image\r
-//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC\r
+//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA\r
CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());\r
\r
+//! resizes the image\r
+//! Supports INTER_AREA\r
+CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, GpuMat& buffer, double fx=0, double fy=0, int interpolation = INTER_AREA, Stream& stream = Stream::Null());\r
+\r
//! warps the image using affine transformation\r
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC\r
CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR,\r
Interpolation(cv::INTER_CUBIC), Interpolation(cv::INTER_AREA)),\r
testing::Values(Scale(0.5), Scale(0.3), Scale(2.0))));\r
\r
+GPU_PERF_TEST(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, Scale)\r
+{\r
+ cv::gpu::DeviceInfo devInfo = GET_PARAM(0);\r
+ cv::gpu::setDevice(devInfo.deviceID());\r
+\r
+ cv::Size size = GET_PARAM(1);\r
+ int type = GET_PARAM(2);\r
+ int interpolation = cv::INTER_AREA;\r
+ double f = GET_PARAM(3);\r
+\r
+ cv::Mat src_host(size, type);\r
+ fill(src_host, 0, 255);\r
+\r
+ cv::gpu::GpuMat src(src_host);\r
+ cv::gpu::GpuMat dst;\r
+\r
+ cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);\r
+\r
+ declare.time(1.0);\r
+\r
+ TEST_CYCLE()\r
+ {\r
+ cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);\r
+ }\r
+}\r
+\r
+INSTANTIATE_TEST_CASE_P(ImgProc, ResizeArea, testing::Combine(\r
+ ALL_DEVICES,\r
+ testing::Values(perf::sz1080p, cv::Size(4096, 2048)),\r
+ testing::Values(MatType(CV_8UC1)/*, MatType(CV_8UC3), MatType(CV_8UC4),\r
+ MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),\r
+ MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)*/),\r
+ testing::Values(Scale(0.2),Scale(0.1),Scale(0.05))));\r
+\r
//////////////////////////////////////////////////////////////////////\r
// WarpAffine\r
\r
Interpolation(cv::INTER_CUBIC), Interpolation(cv::INTER_AREA)),\r
testing::Values(Scale(0.5), Scale(0.3), Scale(2.0))));\r
\r
+GPU_PERF_TEST(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, Scale)\r
+{\r
+ cv::Size size = GET_PARAM(1);\r
+ int type = GET_PARAM(2);\r
+ int interpolation = cv::INTER_AREA;\r
+ double f = GET_PARAM(3);\r
+\r
+ cv::Mat src_host(size, type);\r
+ fill(src_host, 0, 255);\r
+\r
+ cv::Mat src(src_host);\r
+ cv::Mat dst;\r
+\r
+ cv::resize(src, dst, cv::Size(), f, f, interpolation);\r
+\r
+ declare.time(1.0);\r
+\r
+ TEST_CYCLE()\r
+ {\r
+ cv::resize(src, dst, cv::Size(), f, f, interpolation);\r
+ }\r
+}\r
+\r
+INSTANTIATE_TEST_CASE_P(ImgProc, ResizeArea, testing::Combine(\r
+ ALL_DEVICES,\r
+ testing::Values(perf::sz1080p, cv::Size(4096, 2048)),\r
+ testing::Values(MatType(CV_8UC1)/*, MatType(CV_8UC3), MatType(CV_8UC4),\r
+ MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),\r
+ MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)*/),\r
+ testing::Values(Scale(0.2),Scale(0.1),Scale(0.05))));\r
+\r
//////////////////////////////////////////////////////////////////////\r
// WarpAffine\r
\r
{\r
dim3 block(32, 8);\r
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
-\r
BrdConstant<T> brd(src.rows, src.cols);\r
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);\r
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);\r
//template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);\r
+\r
+ template<typename T> struct scan_traits{};\r
+\r
+ template<> struct scan_traits<uchar>\r
+ {\r
+ typedef int scan_line_type;\r
+ };\r
+\r
+ template <typename Ptr2D, typename T>\r
+ __global__ void resize_area_scan(const Ptr2D src, int fx, int fy, DevMem2D_<T> dst, DevMem2D_<T> buffer)\r
+ {\r
+ typedef typename scan_traits<T>::scan_line_type W;\r
+ extern __shared__ W line[];\r
+\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+ }\r
+\r
+ template <typename T> struct InterAreaDispatcherStream\r
+ {\r
+ static void call(DevMem2D_<T> src, int fx, int fy, DevMem2D_<T> dst, DevMem2D_<T> buffer, cudaStream_t stream)\r
+ {\r
+ dim3 block(256, 1);\r
+ dim3 grid(divUp(dst.cols, block.x), 1);\r
+\r
+ resize_area_scan<<<grid, block, 256 * 2 * sizeof(typename scan_traits<T>::scan_line_type) >>>(src, fx, fy, dst, buffer);\r
+ cudaSafeCall( cudaGetLastError() );\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ };\r
+\r
+ template <typename T>\r
+ void resize_area_gpu(DevMem2Db src, DevMem2Db dst,float fx, float fy,\r
+ int interpolation, DevMem2Db buffer, cudaStream_t stream)\r
+ {\r
+ (void)interpolation;\r
+\r
+ int iscale_x = round(fx);\r
+ int iscale_y = round(fy);\r
+\r
+ InterAreaDispatcherStream<T>::call(src, iscale_x, iscale_y, dst, buffer, stream);\r
+ }\r
+\r
+ template void resize_area_gpu<uchar>(DevMem2Db src, DevMem2Db dst, float fx, float fy, int interpolation, DevMem2Db buffer, cudaStream_t stream);\r
+\r
} // namespace imgproc\r
}}} // namespace cv { namespace gpu { namespace device\r
\r
#ifndef HAVE_CUDA\r
\r
-void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }\r
+void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
+{\r
+ (void)src;\r
+ (void)dst;\r
+ (void)dsize;\r
+ (void)fx;\r
+ (void)fy;\r
+ (void)interpolation;\r
+ (void)s;\r
+\r
+ throw_nogpu();\r
+}\r
+void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy,\r
+ int interpolation, const GpuMat& buffer, Stream& s)\r
+{\r
+ (void)src;\r
+ (void)dst;\r
+ (void)dsize;\r
+ (void)fx;\r
+ (void)fy;\r
+ (void)interpolation;\r
+ (void)buffer;\r
+ (void)s;\r
+\r
+ throw_nogpu();\r
+}\r
\r
#else // HAVE_CUDA\r
\r
template <typename T>\r
void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,\r
DevMem2Db dst, int interpolation, cudaStream_t stream);\r
+\r
+ template <typename T>\r
+ void resize_area_gpu(DevMem2Db src, DevMem2Db dst,float fx, float fy,\r
+ int interpolation, DevMem2Db buffer, cudaStream_t stream);\r
}\r
}}}\r
\r
+void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, GpuMat& buffer, double fx, double fy,\r
+ int interpolation, Stream& s)\r
+{\r
+ CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
+ CV_Assert(interpolation == INTER_AREA);\r
+ CV_Assert( (fx < 1.0) && (fy < 1.0));\r
+ CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));\r
+\r
+ if (dsize == Size())\r
+ dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));\r
+ else\r
+ {\r
+ fx = static_cast<double>(dsize.width) / src.cols;\r
+ fy = static_cast<double>(dsize.height) / src.rows;\r
+ }\r
+\r
+ fx = static_cast<float>(1.0 / fx);\r
+ fy = static_cast<float>(1.0 / fy);\r
+\r
+ dst.create(dsize, src.type());\r
+ buffer.create(cv::Size(dsize.width, src.rows), src.type());\r
+\r
+ if (dsize == src.size())\r
+ {\r
+ if (s)\r
+ s.enqueueCopy(src, dst);\r
+ else\r
+ src.copyTo(dst);\r
+ return;\r
+ }\r
+\r
+ cudaStream_t stream = StreamAccessor::getStream(s);\r
+\r
+ cv::gpu::device::imgproc::resize_area_gpu<uchar>(src, dst, fx, fy, interpolation, buffer, stream);\r
+}\r
+\r
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
{\r
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r