.. ocv:function:: void gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null())
- :param src: Source image. ``CV_8UC1`` and ``CV_8UC4`` source types are supported.
+ :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` and ``CV_32FC1`` source types are supported.
:param dst: Destination image. The size and the number of channels is the same as ``src`` .
INSTANTIATE_TEST_CASE_P(Filter, LinearFilter, testing::Combine(\r
ALL_DEVICES, \r
GPU_TYPICAL_MAT_SIZES, \r
- testing::Values(CV_8UC1, CV_8UC4),\r
- testing::Values(3, 5)));\r
+ testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),\r
+ testing::Values(3, 5, 7, 9)));\r
\r
//////////////////////////////////////////////////////////////////////\r
// SeparableLinearFilter\r
//////////////////////////////////////////////////////////////////////\r
// Convolve\r
\r
-GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int)\r
+GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int, bool)\r
{\r
cv::gpu::DeviceInfo devInfo = GET_PARAM(0);\r
cv::Size size = GET_PARAM(1);\r
int templ_size = GET_PARAM(2);\r
+ bool ccorr = GET_PARAM(3);\r
\r
cv::gpu::setDevice(devInfo.deviceID());\r
\r
\r
TEST_CYCLE()\r
{\r
- cv::gpu::convolve(image, templ, dst, false, buf);\r
+ cv::gpu::convolve(image, templ, dst, ccorr, buf);\r
}\r
}\r
\r
INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(\r
ALL_DEVICES, \r
GPU_TYPICAL_MAT_SIZES,\r
- testing::Values(3, 9, 27, 32, 64)));\r
+ testing::Values(3, 9, 27, 32, 64),\r
+ testing::Bool()));\r
\r
//////////////////////////////////////////////////////////////////////\r
// PyrDown\r
cudaSafeCall(cudaDeviceSynchronize());\r
}\r
\r
-\r
//////////////////////////////////////////////////////////////////////////\r
- // convolve\r
+ // filter2D\r
\r
- #define CONVOLVE_MAX_KERNEL_SIZE 17\r
+ #define FILTER2D_MAX_KERNEL_SIZE 16\r
\r
- __constant__ float c_convolveKernel[CONVOLVE_MAX_KERNEL_SIZE * CONVOLVE_MAX_KERNEL_SIZE];\r
+ __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];\r
\r
- __global__ void convolve(const DevMem2Df src, PtrStepf dst, int kWidth, int kHeight)\r
- {\r
- __shared__ float smem[16 + 2 * 8][16 + 2 * 8];\r
+ texture<float, cudaTextureType2D, cudaReadModeElementType> filter2DTex(0, cudaFilterModePoint, cudaAddressModeBorder);\r
\r
+ __global__ void filter2D(int ofsX, int ofsY, DevMem2Df dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)\r
+ {\r
const int x = blockIdx.x * blockDim.x + threadIdx.x;\r
const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
\r
- // x | x 0 | 0\r
- // -----------\r
- // x | x 0 | 0\r
- // 0 | 0 0 | 0\r
- // -----------\r
- // 0 | 0 0 | 0\r
- smem[threadIdx.y][threadIdx.x] = src.ptr(::min(::max(y - 8, 0), src.rows - 1))[::min(::max(x - 8, 0), src.cols - 1)];\r
-\r
- // 0 | 0 x | x\r
- // -----------\r
- // 0 | 0 x | x\r
- // 0 | 0 0 | 0\r
- // -----------\r
- // 0 | 0 0 | 0\r
- smem[threadIdx.y][threadIdx.x + 16] = src.ptr(::min(::max(y - 8, 0), src.rows - 1))[::min(x + 8, src.cols - 1)];\r
-\r
- // 0 | 0 0 | 0\r
- // -----------\r
- // 0 | 0 0 | 0\r
- // x | x 0 | 0\r
- // -----------\r
- // x | x 0 | 0\r
- smem[threadIdx.y + 16][threadIdx.x] = src.ptr(::min(y + 8, src.rows - 1))[::min(::max(x - 8, 0), src.cols - 1)];\r
-\r
- // 0 | 0 0 | 0\r
- // -----------\r
- // 0 | 0 0 | 0\r
- // 0 | 0 x | x\r
- // -----------\r
- // 0 | 0 x | x\r
- smem[threadIdx.y + 16][threadIdx.x + 16] = src.ptr(::min(y + 8, src.rows - 1))[::min(x + 8, src.cols - 1)];\r
-\r
- __syncthreads();\r
-\r
- if (x < src.cols && y < src.rows)\r
- {\r
- float res = 0;\r
+ if (x >= dst.cols || y >= dst.rows)\r
+ return;\r
\r
- for (int i = 0; i < kHeight; ++i)\r
- {\r
- for (int j = 0; j < kWidth; ++j)\r
- {\r
- res += smem[threadIdx.y + 8 - kHeight / 2 + i][threadIdx.x + 8 - kWidth / 2 + j] * c_convolveKernel[i * kWidth + j];\r
- }\r
- }\r
+ float res = 0;\r
+\r
+ const int baseX = ofsX + x - anchorX;\r
+ const int baseY = ofsY + y - anchorY;\r
+\r
+ int kInd = 0;\r
\r
- dst.ptr(y)[x] = res;\r
+ for (int i = 0; i < kHeight; ++i)\r
+ {\r
+ for (int j = 0; j < kWidth; ++j)\r
+ res += tex2D(filter2DTex, baseX + j, baseY + i) * c_filter2DKernel[kInd++];\r
}\r
+\r
+ dst.ptr(y)[x] = res;\r
}\r
\r
- void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream)\r
+ void filter2D_gpu(DevMem2Df src, int ofsX, int ofsY, DevMem2Df dst, int kWidth, int kHeight, int anchorX, int anchorY, float* kernel, cudaStream_t stream)\r
{\r
- cudaSafeCall(cudaMemcpyToSymbol(c_convolveKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );\r
+ cudaSafeCall(cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );\r
\r
const dim3 block(16, 16);\r
- const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));\r
+ const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
+\r
+ bindTexture(&filter2DTex, src);\r
\r
- convolve<<<grid, block, 0, stream>>>(src, dst, kWidth, kHeight);\r
+ filter2D<<<grid, block, 0, stream>>>(ofsX, ofsY, dst, kWidth, kHeight, anchorX, anchorY);\r
cudaSafeCall(cudaGetLastError());\r
\r
if (stream == 0)\r
////////////////////////////////////////////////////////////////////////////////////////////////////\r
// Linear Filter\r
\r
+namespace cv { namespace gpu { namespace device \r
+{\r
+ namespace imgproc\r
+ {\r
+ void filter2D_gpu(DevMem2Df src, int ofsX, int ofsY, DevMem2Df dst, int kWidth, int kHeight, int anchorX, int anchorY, float* kernel, cudaStream_t stream);\r
+ }\r
+}}}\r
+\r
namespace\r
{\r
typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, \r
Npp32s nDivisor;\r
nppFilter2D_t func;\r
};\r
+\r
+ struct GpuLinearFilter : public BaseFilter_GPU\r
+ {\r
+ GpuLinearFilter(Size ksize_, Point anchor_, const GpuMat& kernel_) : \r
+ BaseFilter_GPU(ksize_, anchor_), kernel(kernel_) {}\r
+ \r
+ virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())\r
+ {\r
+ using namespace cv::gpu::device::imgproc;\r
+\r
+ Point ofs;\r
+ Size wholeSize;\r
+ src.locateROI(wholeSize, ofs);\r
+ GpuMat srcWhole(wholeSize, src.type(), src.datastart);\r
+\r
+ filter2D_gpu(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr<float>(), StreamAccessor::getStream(stream));\r
+ }\r
+\r
+ GpuMat kernel;\r
+ };\r
}\r
\r
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, Point anchor)\r
{\r
- static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};\r
+ CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1);\r
+ CV_Assert(dstType == srcType);\r
+\r
+ if (srcType == CV_32FC1)\r
+ {\r
+ CV_Assert(ksize.width * ksize.height <= 16 * 16);\r
+\r
+ GpuMat gpu_krnl;\r
+ normalizeKernel(kernel, gpu_krnl, CV_32F);\r
\r
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseFilter_GPU>(new GpuLinearFilter(ksize, anchor, gpu_krnl));\r
+ }\r
+ else\r
+ {\r
+ static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};\r
\r
- GpuMat gpu_krnl;\r
- int nDivisor;\r
- normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);\r
- normalizeAnchor(anchor, ksize);\r
+ GpuMat gpu_krnl;\r
+ int nDivisor;\r
+ normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);\r
\r
- return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));\r
+ } \r
} \r
\r
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Point& anchor)\r
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));\r
\r
Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor);\r
- f->apply(src, dst, Rect(0, 0, -1, -1), stream);\r
+\r
+ f->apply(src, dst, src.type() == CV_32FC1 ? Rect(0, 0, src.cols, src.rows) : Rect(0, 0, -1, -1), stream);\r
}\r
\r
////////////////////////////////////////////////////////////////////////////////////////////////////\r
convolve(image, templ, result, ccorr, buf);\r
}\r
\r
-namespace cv { namespace gpu { namespace device \r
-{\r
- namespace imgproc\r
- {\r
- void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream);\r
- }\r
-}}}\r
-\r
void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)\r
{\r
using namespace ::cv::gpu::device::imgproc;\r
\r
#ifndef HAVE_CUFFT\r
-\r
- CV_Assert(image.type() == CV_32F);\r
- CV_Assert(templ.type() == CV_32F);\r
- CV_Assert(templ.cols <= 17 && templ.rows <= 17);\r
- \r
- result.create(image.size(), CV_32F);\r
-\r
- GpuMat& contKernel = buf.templ_block;\r
-\r
- if (templ.isContinuous())\r
- contKernel = templ;\r
- else\r
- {\r
- contKernel = createContinuous(templ.size(), templ.type());\r
-\r
- if (stream)\r
- stream.enqueueCopy(templ, contKernel);\r
- else\r
- templ.copyTo(contKernel);\r
- }\r
-\r
- convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr<float>(), StreamAccessor::getStream(stream));\r
-\r
+ throw_nogpu();\r
#else\r
-\r
StaticAssert<sizeof(float) == sizeof(cufftReal)>::check();\r
StaticAssert<sizeof(float) * 2 == sizeof(cufftComplex)>::check();\r
\r
CV_Assert(image.type() == CV_32F);\r
CV_Assert(templ.type() == CV_32F);\r
\r
- if (templ.cols < 13 && templ.rows < 13)\r
- {\r
- result.create(image.size(), CV_32F);\r
+ buf.create(image.size(), templ.size());\r
+ result.create(buf.result_size, CV_32F);\r
\r
- GpuMat& contKernel = buf.templ_block;\r
+ Size& block_size = buf.block_size;\r
+ Size& dft_size = buf.dft_size;\r
\r
- if (templ.isContinuous())\r
- contKernel = templ;\r
- else\r
- {\r
- contKernel = createContinuous(templ.size(), templ.type());\r
+ GpuMat& image_block = buf.image_block;\r
+ GpuMat& templ_block = buf.templ_block;\r
+ GpuMat& result_data = buf.result_data;\r
\r
- if (stream)\r
- stream.enqueueCopy(templ, contKernel);\r
- else\r
- templ.copyTo(contKernel);\r
- }\r
-\r
- convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr<float>(), StreamAccessor::getStream(stream));\r
- }\r
- else\r
- {\r
- buf.create(image.size(), templ.size());\r
- result.create(buf.result_size, CV_32F);\r
+ GpuMat& image_spect = buf.image_spect;\r
+ GpuMat& templ_spect = buf.templ_spect;\r
+ GpuMat& result_spect = buf.result_spect;\r
\r
- Size& block_size = buf.block_size;\r
- Size& dft_size = buf.dft_size;\r
+ cufftHandle planR2C, planC2R;\r
+ cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
+ cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
\r
- GpuMat& image_block = buf.image_block;\r
- GpuMat& templ_block = buf.templ_block;\r
- GpuMat& result_data = buf.result_data;\r
+ cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) );\r
+ cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) );\r
\r
- GpuMat& image_spect = buf.image_spect;\r
- GpuMat& templ_spect = buf.templ_spect;\r
- GpuMat& result_spect = buf.result_spect;\r
+ GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
+ copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
+ templ_block.cols - templ_roi.cols, 0, Scalar(), stream);\r
\r
- cufftHandle planR2C, planC2R;\r
- cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
- cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
+ cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
+ templ_spect.ptr<cufftComplex>()));\r
\r
- cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) );\r
- cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) );\r
-\r
- GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
- copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
- templ_block.cols - templ_roi.cols, 0, Scalar(), stream);\r
-\r
- cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
- templ_spect.ptr<cufftComplex>()));\r
-\r
- // Process all blocks of the result matrix\r
- for (int y = 0; y < result.rows; y += block_size.height)\r
+ // Process all blocks of the result matrix\r
+ for (int y = 0; y < result.rows; y += block_size.height)\r
+ {\r
+ for (int x = 0; x < result.cols; x += block_size.width)\r
{\r
- for (int x = 0; x < result.cols; x += block_size.width)\r
- {\r
- Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,\r
- std::min(y + dft_size.height, image.rows) - y);\r
- GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), \r
- image.step);\r
- copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,\r
- 0, image_block.cols - image_roi.cols, 0, Scalar(), stream);\r
-\r
- cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(), \r
- image_spect.ptr<cufftComplex>()));\r
- mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,\r
- 1.f / dft_size.area(), ccorr, stream);\r
- cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
- result_data.ptr<cufftReal>()));\r
-\r
- Size result_roi_size(std::min(x + block_size.width, result.cols) - x,\r
- std::min(y + block_size.height, result.rows) - y);\r
- GpuMat result_roi(result_roi_size, result.type(), \r
- (void*)(result.ptr<float>(y) + x), result.step);\r
- GpuMat result_block(result_roi_size, result_data.type(), \r
- result_data.ptr(), result_data.step);\r
-\r
- if (stream)\r
- stream.enqueueCopy(result_block, result_roi);\r
- else\r
- result_block.copyTo(result_roi);\r
- }\r
- }\r
+ Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,\r
+ std::min(y + dft_size.height, image.rows) - y);\r
+ GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), \r
+ image.step);\r
+ copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,\r
+ 0, image_block.cols - image_roi.cols, 0, Scalar(), stream);\r
+\r
+ cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(), \r
+ image_spect.ptr<cufftComplex>()));\r
+ mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,\r
+ 1.f / dft_size.area(), ccorr, stream);\r
+ cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
+ result_data.ptr<cufftReal>()));\r
+\r
+ Size result_roi_size(std::min(x + block_size.width, result.cols) - x,\r
+ std::min(y + block_size.height, result.rows) - y);\r
+ GpuMat result_roi(result_roi_size, result.type(), \r
+ (void*)(result.ptr<float>(y) + x), result.step);\r
+ GpuMat result_block(result_roi_size, result_data.type(), \r
+ result_data.ptr(), result_data.step);\r
\r
- cufftSafeCall(cufftDestroy(planR2C));\r
- cufftSafeCall(cufftDestroy(planC2R));\r
+ if (stream)\r
+ stream.enqueueCopy(result_block, result_roi);\r
+ else\r
+ result_block.copyTo(result_roi);\r
+ }\r
}\r
\r
+ cufftSafeCall(cufftDestroy(planR2C));\r
+ cufftSafeCall(cufftDestroy(planC2R));\r
#endif\r
}\r
\r
Values((int)cv::MORPH_OPEN, (int)cv::MORPH_CLOSE, (int)cv::MORPH_GRADIENT, (int)cv::MORPH_TOPHAT, (int)cv::MORPH_BLACKHAT),\r
USE_ROI));\r
\r
+/////////////////////////////////////////////////////////////////////////////////////////////////\r
+// filter2D\r
+\r
+PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, int, UseRoi)\r
+{\r
+ cv::gpu::DeviceInfo devInfo;\r
+ int ksize;\r
+ bool useRoi;\r
+\r
+ cv::Mat img;\r
+ cv::Mat kernel;\r
+ \r
+ virtual void SetUp()\r
+ {\r
+ devInfo = GET_PARAM(0);\r
+ ksize = GET_PARAM(1);\r
+ useRoi = GET_PARAM(2);\r
+\r
+ cv::gpu::setDevice(devInfo.deviceID());\r
+ \r
+ img = readImage("stereobp/aloe-L.png");\r
+ ASSERT_FALSE(img.empty());\r
+\r
+ kernel = cv::Mat::ones(ksize, ksize, CV_32FC1);\r
+ }\r
+};\r
+\r
+TEST_P(Filter2D, Rgba)\r
+{\r
+ cv::Mat src;\r
+ cv::cvtColor(img, src, CV_BGR2BGRA);\r
+\r
+ cv::Mat dst_gold;\r
+ cv::filter2D(src, dst_gold, -1, kernel, cv::Point(-1, -1), 0, cv::BORDER_CONSTANT);\r
+\r
+ cv::Mat dst;\r
+\r
+ cv::gpu::GpuMat dev_dst;\r
+\r
+ cv::gpu::filter2D(loadMat(src, useRoi), dev_dst, -1, kernel);\r
+\r
+ dev_dst.download(dst);\r
+\r
+ EXPECT_MAT_NEAR_KSIZE(dst_gold, dst, ksize, 0.0);\r
+}\r
+\r
+TEST_P(Filter2D, Gray)\r
+{\r
+ cv::Mat src;\r
+ cv::cvtColor(img, src, CV_BGR2GRAY);\r
+\r
+ cv::Mat dst_gold;\r
+ cv::filter2D(src, dst_gold, -1, kernel, cv::Point(-1, -1), 0, cv::BORDER_CONSTANT);\r
+\r
+ cv::Mat dst;\r
+\r
+ cv::gpu::GpuMat dev_dst;\r
+\r
+ cv::gpu::filter2D(loadMat(src, useRoi), dev_dst, -1, kernel);\r
+\r
+ dev_dst.download(dst);\r
+\r
+ EXPECT_MAT_NEAR_KSIZE(dst_gold, dst, ksize, 0.0);\r
+}\r
+\r
+TEST_P(Filter2D, 32FC1)\r
+{\r
+ cv::Mat src;\r
+ cv::cvtColor(img, src, CV_BGR2GRAY);\r
+ src.convertTo(src, CV_32F, 1.0 / 255.0);\r
+\r
+ cv::Mat dst_gold;\r
+ cv::filter2D(src, dst_gold, -1, kernel, cv::Point(-1, -1), 0, cv::BORDER_CONSTANT);\r
+\r
+ cv::Mat dst;\r
+\r
+ cv::gpu::GpuMat dev_dst;\r
+\r
+ cv::gpu::filter2D(loadMat(src, useRoi), dev_dst, -1, kernel);\r
+\r
+ dev_dst.download(dst);\r
+\r
+ EXPECT_MAT_NEAR_KSIZE(dst_gold, dst, ksize, 1e-3);\r
+}\r
+\r
+INSTANTIATE_TEST_CASE_P(Filter, Filter2D, Combine(\r
+ ALL_DEVICES,\r
+ Values(3, 5, 7, 11, 13, 15),\r
+ USE_ROI));\r
+\r
#endif // HAVE_CUDA\r
///////////////////////////////////////////////////////////////////////////////////////////////////////\r
// cornerHarris\r
\r
-PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, Border)\r
+PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, Border, int, int)\r
{\r
cv::gpu::DeviceInfo devInfo;\r
int type;\r
int borderType;\r
+ int blockSize;\r
+ int apertureSize;\r
\r
cv::Mat src;\r
- int blockSize;\r
- int apertureSize; \r
double k;\r
\r
cv::Mat dst_gold;\r
- \r
+\r
virtual void SetUp()\r
{\r
devInfo = GET_PARAM(0);\r
type = GET_PARAM(1);\r
borderType = GET_PARAM(2);\r
+ blockSize = GET_PARAM(3);\r
+ apertureSize = GET_PARAM(4); \r
\r
cv::gpu::setDevice(devInfo.deviceID());\r
- \r
+\r
cv::RNG& rng = TS::ptr()->get_rng();\r
- \r
+\r
cv::Mat img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);\r
ASSERT_FALSE(img.empty());\r
- \r
+\r
img.convertTo(src, type, type == CV_32F ? 1.0 / 255.0 : 1.0);\r
- \r
- blockSize = 1 + rng.next() % 5;\r
- apertureSize = 1 + 2 * (rng.next() % 4); \r
+\r
k = rng.uniform(0.1, 0.9);\r
\r
cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);\r
TEST_P(CornerHarris, Accuracy)\r
{\r
cv::Mat dst;\r
- \r
+\r
cv::gpu::GpuMat dev_dst;\r
\r
cv::gpu::cornerHarris(loadMat(src), dev_dst, blockSize, apertureSize, k, borderType);\r
INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, Combine(\r
ALL_DEVICES, \r
Values(CV_8UC1, CV_32FC1), \r
- Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT)));\r
+ Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT),\r
+ Values(3, 5, 7),\r
+ Values(0, 3, 5, 7)));\r
\r
///////////////////////////////////////////////////////////////////////////////////////////////////////\r
// cornerMinEigen\r
\r
-PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border)\r
+PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border, int, int)\r
{\r
cv::gpu::DeviceInfo devInfo;\r
int type;\r
int borderType;\r
-\r
- cv::Mat src;\r
int blockSize;\r
int apertureSize;\r
\r
+ cv::Mat src;\r
+\r
cv::Mat dst_gold;\r
\r
virtual void SetUp()\r
devInfo = GET_PARAM(0);\r
type = GET_PARAM(1);\r
borderType = GET_PARAM(2);\r
+ blockSize = GET_PARAM(3);\r
+ apertureSize = GET_PARAM(4); \r
+\r
+ cv::gpu::setDevice(devInfo.deviceID());\r
\r
- cv::gpu::setDevice(devInfo.deviceID()); \r
- \r
cv::RNG& rng = TS::ptr()->get_rng();\r
- \r
+\r
cv::Mat img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);\r
ASSERT_FALSE(img.empty());\r
\r
img.convertTo(src, type, type == CV_32F ? 1.0 / 255.0 : 1.0);\r
- \r
- blockSize = 1 + rng.next() % 5;\r
- apertureSize = 1 + 2 * (rng.next() % 4);\r
\r
cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);\r
}\r
TEST_P(CornerMinEigen, Accuracy)\r
{\r
cv::Mat dst;\r
- \r
+\r
cv::gpu::GpuMat dev_dst;\r
\r
cv::gpu::cornerMinEigenVal(loadMat(src), dev_dst, blockSize, apertureSize, borderType);\r
INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigen, Combine(\r
ALL_DEVICES, \r
Values(CV_8UC1, CV_32FC1), \r
- Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT)));\r
+ Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT),\r
+ Values(3, 5, 7),\r
+ Values(0, 3, 5, 7)));\r
\r
////////////////////////////////////////////////////////////////////////\r
// ColumnSum\r
////////////////////////////////////////////////////////\r
// convolve\r
\r
-PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, int)\r
+namespace\r
+{\r
+ void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)\r
+ {\r
+ // reallocate the output array if needed\r
+ C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());\r
+ Size dftSize;\r
+\r
+ // compute the size of DFT transform\r
+ dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);\r
+ dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);\r
+\r
+ // allocate temporary buffers and initialize them with 0\92s\r
+ cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));\r
+ cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));\r
+\r
+ // copy A and B to the top-left corners of tempA and tempB, respectively\r
+ cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));\r
+ A.copyTo(roiA);\r
+ cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));\r
+ B.copyTo(roiB);\r
+\r
+ // now transform the padded A & B in-place;\r
+ // use "nonzeroRows" hint for faster processing\r
+ cv::dft(tempA, tempA, 0, A.rows);\r
+ cv::dft(tempB, tempB, 0, B.rows);\r
+\r
+ // multiply the spectrums;\r
+ // the function handles packed spectrum representations well\r
+ cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);\r
+\r
+ // transform the product back from the frequency domain.\r
+ // Even though all the result rows will be non-zero,\r
+ // you need only the first C.rows of them, and thus you\r
+ // pass nonzeroRows == C.rows\r
+ cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);\r
+\r
+ // now copy the result back to C.\r
+ tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);\r
+ }\r
+}\r
+\r
+PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, int, bool)\r
{ \r
cv::gpu::DeviceInfo devInfo;\r
int ksize;\r
+ bool ccorr;\r
\r
- cv::Size size; \r
cv::Mat src;\r
cv::Mat kernel;\r
\r
{\r
devInfo = GET_PARAM(0);\r
ksize = GET_PARAM(1);\r
+ ccorr = GET_PARAM(2);\r
\r
cv::gpu::setDevice(devInfo.deviceID());\r
\r
cv::RNG& rng = TS::ptr()->get_rng();\r
\r
- size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));\r
+ cv::Size size(rng.uniform(200, 400), rng.uniform(200, 400));\r
\r
- src = randomMat(rng, size, CV_32FC1, 0.0, 255.0, false);\r
+ src = randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);\r
kernel = randomMat(rng, cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0, false);\r
\r
- cv::filter2D(src, dst_gold, CV_32F, kernel, cv::Point(-1, -1), 0, cv::BORDER_REPLICATE);\r
+ convolveDFT(src, kernel, dst_gold, ccorr);\r
}\r
};\r
\r
TEST_P(Convolve, Accuracy)\r
-{ \r
+{\r
cv::Mat dst;\r
\r
cv::gpu::GpuMat d_dst;\r
\r
- cv::gpu::convolve(loadMat(src), loadMat(kernel), d_dst);\r
+ cv::gpu::convolve(loadMat(src), loadMat(kernel), d_dst, ccorr);\r
\r
d_dst.download(dst);\r
\r
- EXPECT_MAT_NEAR(dst, dst_gold, 1e-2);\r
+ EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);\r
}\r
\r
\r
INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, Combine(\r
ALL_DEVICES, \r
- Values(3, 5, 7, 9, 11)));\r
+ Values(3, 7, 11, 17, 19, 23, 45),\r
+ Bool()));\r
\r
#endif // HAVE_CUDA\r