// We don't support real-to-real transform\r
CV_Assert(is_complex_input || is_complex_output);\r
\r
- GpuMat src_data, src_aux;\r
+ GpuMat src_data;\r
\r
// Make sure here we work with the continuous input, \r
// as CUFFT can't handle gaps\r
- if (src.isContinuous())\r
- src_data = src_aux = src;\r
- else\r
- {\r
- src_data = GpuMat(1, src.size().area(), src.type());\r
- src_aux = GpuMat(src.rows, src.cols, src.type(), src_data.ptr(),\r
- src.cols * src.elemSize());\r
- src.copyTo(src_aux);\r
+ src_data = src;\r
+ createContinuous(src.rows, src.cols, src.type(), src_data);\r
+ if (src_data.data != src.data)\r
+ src.copyTo(src_data);\r
\r
- if (is_1d_input && !is_row_dft)\r
- {\r
- // If the source matrix is the single column\r
- // reshape it into single row\r
- int rows = std::min(src.rows, src.cols);\r
- int cols = src.size().area() / rows;\r
- src_aux = GpuMat(rows, cols, src.type(), src_data.ptr(), \r
- cols * src.elemSize());\r
- }\r
- }\r
+ if (is_1d_input && !is_row_dft)\r
+ // If the source matrix is single column reshape it into single row\r
+ src_data = src_data.reshape(0, std::min(src.rows, src.cols));\r
\r
cufftType dft_type = CUFFT_R2C;\r
if (is_complex_input) \r
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
\r
- int dft_rows = src_aux.rows;\r
- int dft_cols = src_aux.cols;\r
+ int dft_rows = src_data.rows;\r
+ int dft_cols = src_data.cols;\r
if (is_complex_input && !is_complex_output)\r
- dft_cols = (src_aux.cols - 1) * 2 + (int)odd;\r
+ dft_cols = (src_data.cols - 1) * 2 + (int)odd;\r
CV_Assert(dft_cols > 1);\r
\r
cufftHandle plan;\r
else\r
cufftPlan2d(&plan, dft_rows, dft_cols, dft_type);\r
\r
- GpuMat dst_data, dst_aux;\r
int dst_cols, dst_rows;\r
- bool is_dst_mem_good;\r
\r
if (is_complex_input)\r
{\r
if (is_complex_output)\r
{\r
- is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2 \r
- && dst.cols >= src.cols && dst.rows >= src.rows;\r
-\r
- if (is_dst_mem_good)\r
- dst_data = dst;\r
- else\r
- {\r
- dst_data.create(1, src.size().area(), CV_32FC2);\r
- dst_aux = GpuMat(src.rows, src.cols, dst_data.type(), dst_data.ptr(),\r
- src.cols * dst_data.elemSize());\r
- }\r
-\r
+ createContinuous(src.rows, src.cols, CV_32FC2, dst);\r
cufftSafeCall(cufftExecC2C(\r
- plan, src_data.ptr<cufftComplex>(),\r
- dst_data.ptr<cufftComplex>(),\r
+ plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),\r
is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));\r
-\r
- if (!is_dst_mem_good)\r
- {\r
- dst.create(dst_aux.size(), dst_aux.type());\r
- dst_aux.copyTo(dst);\r
- }\r
}\r
else\r
{\r
dst_rows = src.rows;\r
dst_cols = (src.cols - 1) * 2 + (int)odd;\r
- if (src_aux.size() != src.size())\r
+ if (src_data.size() != src.size())\r
{\r
dst_rows = (src.rows - 1) * 2 + (int)odd;\r
dst_cols = src.cols;\r
}\r
\r
- is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32F\r
- && dst.cols >= dst_cols && dst.rows >= dst_rows;\r
-\r
- if (is_dst_mem_good)\r
- dst_data = dst;\r
- else\r
- {\r
- dst_data.create(1, dst_rows * dst_cols, CV_32F);\r
- dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(), \r
- dst_cols * dst_data.elemSize());\r
- }\r
-\r
+ createContinuous(dst_rows, dst_cols, CV_32F, dst);\r
cufftSafeCall(cufftExecC2R(\r
- plan, src_data.ptr<cufftComplex>(), dst_data.ptr<cufftReal>()));\r
-\r
- if (!is_dst_mem_good)\r
- {\r
- dst.create(dst_aux.size(), dst_aux.type());\r
- dst_aux.copyTo(dst);\r
- }\r
+ plan, src_data.ptr<cufftComplex>(), dst.ptr<cufftReal>()));\r
}\r
}\r
else\r
{\r
dst_rows = src.rows;\r
dst_cols = src.cols / 2 + 1;\r
- if (src_aux.size() != src.size())\r
+ if (src_data.size() != src.size())\r
{\r
dst_rows = src.rows / 2 + 1;\r
dst_cols = src.cols;\r
}\r
\r
- is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2 \r
- && dst.cols >= dst_cols && dst.rows >= dst_rows;\r
-\r
- if (is_dst_mem_good)\r
- dst_data = dst;\r
- else\r
- {\r
- dst_data.create(1, dst_rows * dst_cols, CV_32FC2);\r
- dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(), \r
- dst_cols * dst_data.elemSize());\r
- }\r
-\r
+ createContinuous(dst_rows, dst_cols, CV_32FC2, dst);\r
cufftSafeCall(cufftExecR2C(\r
- plan, src_data.ptr<cufftReal>(), dst_data.ptr<cufftComplex>()));\r
-\r
- if (!is_dst_mem_good)\r
- {\r
- dst.create(dst_aux.size(), dst_aux.type());\r
- dst_aux.copyTo(dst);\r
- }\r
+ plan, src_data.ptr<cufftReal>(), dst.ptr<cufftComplex>()));\r
}\r
\r
cufftSafeCall(cufftDestroy(plan));\r
block_size.width = std::min(dft_size.width - templ.cols + 1, result.cols);\r
block_size.height = std::min(dft_size.height - templ.rows + 1, result.rows);\r
\r
- GpuMat image_data(1, dft_size.area(), CV_32F);\r
- GpuMat templ_data(1, dft_size.area(), CV_32F);\r
- GpuMat result_data(1, dft_size.area(), CV_32F);\r
+ GpuMat result_data = createContinuous(dft_size, CV_32F);\r
\r
int spect_len = dft_size.height * (dft_size.width / 2 + 1);\r
- GpuMat image_spect(1, spect_len, CV_32FC2);\r
- GpuMat templ_spect(1, spect_len, CV_32FC2);\r
- GpuMat result_spect(1, spect_len, CV_32FC2);\r
+ GpuMat image_spect = createContinuous(1, spect_len, CV_32FC2);\r
+ GpuMat templ_spect = createContinuous(1, spect_len, CV_32FC2);\r
+ GpuMat result_spect = createContinuous(1, spect_len, CV_32FC2);\r
\r
cufftHandle planR2C, planC2R;\r
cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
\r
+ GpuMat templ_block = createContinuous(dft_size, CV_32F);\r
GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
- GpuMat templ_block(dft_size, CV_32F, templ_data.ptr(), dft_size.width * sizeof(cufftReal));\r
copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
templ_block.cols - templ_roi.cols, 0);\r
\r
- cufftSafeCall(cufftExecR2C(planR2C, templ_data.ptr<cufftReal>(), \r
+ cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
templ_spect.ptr<cufftComplex>()));\r
\r
- GpuMat image_block(dft_size, CV_32F, image_data.ptr(), dft_size.width * sizeof(cufftReal));\r
+ GpuMat image_block = createContinuous(dft_size, CV_32F);\r
\r
// Process all blocks of the result matrix\r
for (int y = 0; y < result.rows; y += block_size.height)\r
// Locate ROI in the source matrix\r
GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), image.step);\r
\r
- // Make source image block continous\r
+ // Make source image block is continuous\r
copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, 0, \r
image_block.cols - image_roi.cols, 0);\r
\r
- cufftSafeCall(cufftExecR2C(planR2C, image_data.ptr<cufftReal>(), \r
+ cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(), \r
image_spect.ptr<cufftComplex>()));\r
\r
mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,\r
- 1.f / dft_size.area(), ccorr);\r
+ 1.f / dft_size.area(), ccorr);\r
\r
cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
result_data.ptr<cufftReal>()));\r
result_roi_size.width = std::min(x + block_size.width, result.cols) - x;\r
result_roi_size.height = std::min(y + block_size.height, result.rows) - y;\r
\r
- GpuMat result_roi(result_roi_size, CV_32F, (void*)(result.ptr<float>(y) + x), result.step);\r
- GpuMat result_block(result_roi_size, CV_32F, result_data.ptr(), dft_size.width * sizeof(cufftReal));\r
+ GpuMat result_roi(result_roi_size, result.type(), (void*)(result.ptr<float>(y) + x), result.step);\r
+ GpuMat result_block(result_roi_size, result_data.type(), result_data.ptr(), result_data.step);\r
\r
- // Copy result block into appropriate part of the result matrix.\r
- // We can't compute it inplace as the result of the CUFFT transforms\r
- // is always continous, while the result matrix and its blocks can have gaps.\r
+ // Copy block into appropriate part of the result matrix\r
result_block.copyTo(result_roi);\r
}\r
}\r