From 1ecb6cf7750a780d8fa2053fc17fff06e112ecba Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 27 Dec 2010 09:10:22 +0000 Subject: [PATCH] simplified gpu::columnSum test, it doesn't fail on Quadro anymore (when seed is 000001af5a11badd) after BFM test, but something definitely wrong with NPP_Staging's transpose --- modules/gpu/src/imgproc_gpu.cpp | 30 ++++++++++++++-------------- tests/gpu/src/gputest_main.cpp | 1 - tests/gpu/src/imgproc_gpu.cpp | 44 ++++++++++++++++++++++------------------- 3 files changed, 39 insertions(+), 36 deletions(-) diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 6022bec..bf8e7f6 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -1156,25 +1156,25 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags) if (src_data.data != src.data) src.copyTo(src_data); - Size dft_size_ = dft_size; + Size dft_size_opt = dft_size; if (is_1d_input && !is_row_dft) { // If the source matrix is single column handle it as single row - dft_size_.width = std::max(dft_size.width, dft_size.height); - dft_size_.height = std::min(dft_size.width, dft_size.height); + dft_size_opt.width = std::max(dft_size.width, dft_size.height); + dft_size_opt.height = std::min(dft_size.width, dft_size.height); } cufftType dft_type = CUFFT_R2C; if (is_complex_input) dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R; - CV_Assert(dft_size_.width > 1); + CV_Assert(dft_size_opt.width > 1); cufftHandle plan; if (is_1d_input || is_row_dft) - cufftPlan1d(&plan, dft_size_.width, dft_type, dft_size_.height); + cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height); else - cufftPlan2d(&plan, dft_size_.height, dft_size_.width, dft_type); + cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type); if (is_complex_input) { @@ -1194,7 +1194,8 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags) } else { - if (dft_size == dft_size_) + // We could swap dft_size for efficiency. Here we must reflect it + if (dft_size == dft_size_opt) createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst); else createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst); @@ -1206,7 +1207,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags) cufftSafeCall(cufftDestroy(plan)); if (is_scaled_dft) - multiply(dst, Scalar::all(1. / (dft_size.area())), dst); + multiply(dst, Scalar::all(1. / dft_size.area()), dst); } ////////////////////////////////////////////////////////////////////////////// @@ -1260,18 +1261,19 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, block_size.width = std::min(dft_size.width - templ.cols + 1, result.cols); block_size.height = std::min(dft_size.height - templ.rows + 1, result.rows); - GpuMat result_data = createContinuous(dft_size, CV_32F); - int spect_len = dft_size.height * (dft_size.width / 2 + 1); - GpuMat image_spect(1, spect_len, CV_32FC2); - GpuMat templ_spect(1, spect_len, CV_32FC2); - GpuMat result_spect(1, spect_len, CV_32FC2); + GpuMat image_spect = createContinuous(1, spect_len, CV_32FC2); + GpuMat templ_spect = createContinuous(1, spect_len, CV_32FC2); + GpuMat result_spect = createContinuous(1, spect_len, CV_32FC2); cufftHandle planR2C, planC2R; cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R)); cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C)); + GpuMat image_block = createContinuous(dft_size, CV_32F); GpuMat templ_block = createContinuous(dft_size, CV_32F); + GpuMat result_data = createContinuous(dft_size, CV_32F); + GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step); copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, templ_block.cols - templ_roi.cols, 0); @@ -1279,8 +1281,6 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr(), templ_spect.ptr())); - GpuMat image_block = createContinuous(dft_size, CV_32F); - // Process all blocks of the result matrix for (int y = 0; y < result.rows; y += block_size.height) { diff --git a/tests/gpu/src/gputest_main.cpp b/tests/gpu/src/gputest_main.cpp index 366d2ee..cbeb0d0 100644 --- a/tests/gpu/src/gputest_main.cpp +++ b/tests/gpu/src/gputest_main.cpp @@ -47,7 +47,6 @@ const char* blacklist[] = { "GPU-AsyncGpuMatOperator", // crash "GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR - "GPU-BruteForceMatcher", // often crashes when seed=000001af5a11badd 0 }; diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp index 9557b88..7766d80 100644 --- a/tests/gpu/src/imgproc_gpu.cpp +++ b/tests/gpu/src/imgproc_gpu.cpp @@ -768,34 +768,37 @@ struct CV_GpuColumnSumTest: CvTest { try { - int n = 375; - int m = 1072; - Mat src(n, m, CV_32F); + int cols = 375; + int rows = 1072; + + Mat src(rows, cols, CV_32F); RNG rng(1); rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(1)); - Mat dst_gold, dst2_gold; - - integral(src, dst_gold, dst2_gold); - GpuMat dsrc(src); - GpuMat buf; - GpuMat dst; - columnSum(dsrc, buf); - transpose(buf, dst); - columnSum(dst, buf); - transpose(buf, dst); + GpuMat d_dst; + columnSum(GpuMat(src), d_dst); - Mat dst_ = dst; - for (int i = 0; i < dst_.rows; ++i) + Mat dst = d_dst; + for (int j = 0; j < src.cols; ++j) + { + float a = src.at(0, j); + float b = dst.at(0, j); + if (fabs(a - b) > 0.5f) + { + ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", 0, j, a, b); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + } + for (int i = 1; i < src.rows; ++i) { - const double* dst_gold_data = (const double*)dst_gold.ptr(i + 1); - for (int j = 0; j < dst_.cols; ++j) + for (int j = 0; j < src.cols; ++j) { - float a = (float)dst_gold_data[j + 1]; - float b = dst_.at(i, j); + float a = src.at(i, j) += src.at(i - 1, j); + float b = dst.at(i, j); if (fabs(a - b) > 0.5f) { - ts->printf(CvTS::CONSOLE, "%d %d %f %f\n", i, j, a, b); + ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", i, j, a, b); ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); return; } @@ -804,6 +807,7 @@ struct CV_GpuColumnSumTest: CvTest } catch (const Exception& e) { + ts->printf(CvTS::CONSOLE, e.what()); if (!check_and_treat_gpu_exception(e, ts)) throw; return; } -- 2.7.4