if (src_data.data != src.data)\r
src.copyTo(src_data);\r
\r
- Size dft_size_ = dft_size;\r
+ Size dft_size_opt = dft_size;\r
if (is_1d_input && !is_row_dft)\r
{\r
// If the source matrix is single column handle it as single row\r
- dft_size_.width = std::max(dft_size.width, dft_size.height);\r
- dft_size_.height = std::min(dft_size.width, dft_size.height);\r
+ dft_size_opt.width = std::max(dft_size.width, dft_size.height);\r
+ dft_size_opt.height = std::min(dft_size.width, dft_size.height);\r
}\r
\r
cufftType dft_type = CUFFT_R2C;\r
if (is_complex_input) \r
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
\r
- CV_Assert(dft_size_.width > 1);\r
+ CV_Assert(dft_size_opt.width > 1);\r
\r
cufftHandle plan;\r
if (is_1d_input || is_row_dft)\r
- cufftPlan1d(&plan, dft_size_.width, dft_type, dft_size_.height);\r
+ cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);\r
else\r
- cufftPlan2d(&plan, dft_size_.height, dft_size_.width, dft_type);\r
+ cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);\r
\r
if (is_complex_input)\r
{\r
}\r
else\r
{\r
- if (dft_size == dft_size_)\r
+ // We could swap dft_size for efficiency. Here we must reflect it\r
+ if (dft_size == dft_size_opt)\r
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);\r
else\r
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);\r
cufftSafeCall(cufftDestroy(plan));\r
\r
if (is_scaled_dft)\r
- multiply(dst, Scalar::all(1. / (dft_size.area())), dst);\r
+ multiply(dst, Scalar::all(1. / dft_size.area()), dst);\r
}\r
\r
//////////////////////////////////////////////////////////////////////////////\r
block_size.width = std::min(dft_size.width - templ.cols + 1, result.cols);\r
block_size.height = std::min(dft_size.height - templ.rows + 1, result.rows);\r
\r
- GpuMat result_data = createContinuous(dft_size, CV_32F);\r
-\r
int spect_len = dft_size.height * (dft_size.width / 2 + 1);\r
- GpuMat image_spect(1, spect_len, CV_32FC2);\r
- GpuMat templ_spect(1, spect_len, CV_32FC2);\r
- GpuMat result_spect(1, spect_len, CV_32FC2);\r
+ GpuMat image_spect = createContinuous(1, spect_len, CV_32FC2);\r
+ GpuMat templ_spect = createContinuous(1, spect_len, CV_32FC2);\r
+ GpuMat result_spect = createContinuous(1, spect_len, CV_32FC2);\r
\r
cufftHandle planR2C, planC2R;\r
cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
\r
+ GpuMat image_block = createContinuous(dft_size, CV_32F);\r
GpuMat templ_block = createContinuous(dft_size, CV_32F);\r
+ GpuMat result_data = createContinuous(dft_size, CV_32F);\r
+\r
GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
templ_block.cols - templ_roi.cols, 0);\r
cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
templ_spect.ptr<cufftComplex>()));\r
\r
- GpuMat image_block = createContinuous(dft_size, CV_32F);\r
-\r
// Process all blocks of the result matrix\r
for (int y = 0; y < result.rows; y += block_size.height)\r
{\r
{\r
try\r
{\r
- int n = 375;\r
- int m = 1072;\r
- Mat src(n, m, CV_32F);\r
+ int cols = 375;\r
+ int rows = 1072;\r
+\r
+ Mat src(rows, cols, CV_32F);\r
RNG rng(1);\r
rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(1));\r
- Mat dst_gold, dst2_gold;\r
-\r
- integral(src, dst_gold, dst2_gold);\r
\r
- GpuMat dsrc(src);\r
- GpuMat buf;\r
- GpuMat dst;\r
- columnSum(dsrc, buf);\r
- transpose(buf, dst);\r
- columnSum(dst, buf);\r
- transpose(buf, dst);\r
+ GpuMat d_dst;\r
+ columnSum(GpuMat(src), d_dst);\r
\r
- Mat dst_ = dst;\r
- for (int i = 0; i < dst_.rows; ++i)\r
+ Mat dst = d_dst;\r
+ for (int j = 0; j < src.cols; ++j)\r
+ {\r
+ float a = src.at<float>(0, j);\r
+ float b = dst.at<float>(0, j);\r
+ if (fabs(a - b) > 0.5f)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", 0, j, a, b);\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ return;\r
+ }\r
+ }\r
+ for (int i = 1; i < src.rows; ++i)\r
{\r
- const double* dst_gold_data = (const double*)dst_gold.ptr(i + 1);\r
- for (int j = 0; j < dst_.cols; ++j)\r
+ for (int j = 0; j < src.cols; ++j)\r
{\r
- float a = (float)dst_gold_data[j + 1];\r
- float b = dst_.at<float>(i, j);\r
+ float a = src.at<float>(i, j) += src.at<float>(i - 1, j);\r
+ float b = dst.at<float>(i, j);\r
if (fabs(a - b) > 0.5f)\r
{\r
- ts->printf(CvTS::CONSOLE, "%d %d %f %f\n", i, j, a, b);\r
+ ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", i, j, a, b);\r
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
return;\r
}\r
}\r
catch (const Exception& e)\r
{\r
+ ts->printf(CvTS::CONSOLE, e.what());\r
if (!check_and_treat_gpu_exception(e, ts)) throw;\r
return;\r
}\r