void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }\r
void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); }\r
void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool) { throw_nogpu(); }\r
+void cv::gpu::dft(const GpuMat&, GpuMat&, int, int, bool) { throw_nogpu(); }\r
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }\r
\r
\r
}\r
\r
//////////////////////////////////////////////////////////////////////////////\r
+// dft\r
+\r
+void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bool odd)\r
+{\r
+ CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);\r
+\r
+ // We don't support unpacked output (in the case of real input)\r
+ CV_Assert(!(flags & DFT_COMPLEX_OUTPUT));\r
+\r
+ bool is_1d_input = (src.rows == 1) || (src.cols == 1);\r
+ int is_row_dft = flags & DFT_ROWS;\r
+ int is_scaled_dft = flags & DFT_SCALE;\r
+ int is_inverse = flags & DFT_INVERSE;\r
+ bool is_complex_input = src.channels() == 2;\r
+ bool is_complex_output = !(flags & DFT_REAL_OUTPUT);\r
+\r
+ // We don't support scaled transform\r
+ CV_Assert(!is_scaled_dft);\r
+\r
+ // We don't support real-to-real transform\r
+ CV_Assert(is_complex_input || is_complex_output);\r
+\r
+ GpuMat src_data, src_aux;\r
+\r
+ // Make sure here we work with the continuous input, \r
+ // as CUFFT can't handle gaps\r
+ if (src.isContinuous())\r
+ src_aux = src;\r
+ else\r
+ {\r
+ src_data = GpuMat(1, src.size().area(), src.type());\r
+ src_aux = GpuMat(src.rows, src.cols, src.type(), src_data.ptr(), src.cols * src.elemSize());\r
+ src.copyTo(src_aux);\r
+\r
+ if (is_1d_input && !is_row_dft)\r
+ {\r
+ // If the source matrix is the single column\r
+ // reshape it into single row\r
+ int rows = std::min(src.rows, src.cols);\r
+ int cols = src.size().area() / rows;\r
+ src_aux = GpuMat(rows, cols, src.type(), src_data.ptr(), cols * src.elemSize());\r
+ }\r
+ }\r
+\r
+ cufftType dft_type = CUFFT_R2C;\r
+ if (is_complex_input) \r
+ dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
+\r
+ int dft_cols = src_aux.cols;\r
+ if (is_complex_input && !is_complex_output)\r
+ dft_cols = (src_aux.cols - 1) * 2 + (int)odd;\r
+ CV_Assert(dft_cols > 1);\r
+\r
+ cufftHandle plan;\r
+ if (is_1d_input || is_row_dft)\r
+ cufftPlan1d(&plan, dft_cols, dft_type, src_aux.rows);\r
+ else\r
+ cufftPlan2d(&plan, src_aux.rows, dft_cols, dft_type);\r
+\r
+ GpuMat dst_data, dst_aux;\r
+ int dst_cols, dst_rows;\r
+ bool is_dst_mem_good;\r
+\r
+ if (is_complex_input)\r
+ {\r
+ if (is_complex_output)\r
+ {\r
+ is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2 \r
+ && dst.size().area() >= src.size().area();\r
+\r
+ if (is_dst_mem_good)\r
+ dst_data = dst;\r
+ else\r
+ {\r
+ dst_data.create(1, src.size().area(), CV_32FC2);\r
+ dst_aux = GpuMat(src.rows, src.cols, dst_data.type(), dst_data.ptr(),\r
+ src.cols * dst_data.elemSize());\r
+ }\r
+\r
+ cufftSafeCall(cufftExecC2C(\r
+ plan, src_data.ptr<cufftComplex>(),\r
+ dst_data.ptr<cufftComplex>(),\r
+ is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));\r
+\r
+ if (!is_dst_mem_good)\r
+ {\r
+ dst.create(dst_aux.size(), dst_aux.type());\r
+ dst_aux.copyTo(dst);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ dst_rows = src.rows;\r
+ dst_cols = (src.cols - 1) * 2 + (int)odd;\r
+ if (src_aux.size() != src.size())\r
+ {\r
+ dst_rows = (src.rows - 1) * 2 + (int)odd;\r
+ dst_cols = src.cols;\r
+ }\r
+\r
+ is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32F\r
+ && dst.rows >= dst_rows && dst.cols >= dst_cols;\r
+\r
+ if (is_dst_mem_good)\r
+ dst_data = dst;\r
+ else\r
+ {\r
+ dst_data.create(1, dst_rows * dst_cols, CV_32F);\r
+ dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(), \r
+ dst_cols * dst_data.elemSize());\r
+ }\r
+\r
+ cufftSafeCall(cufftExecC2R(\r
+ plan, src_data.ptr<cufftComplex>(), dst_data.ptr<cufftReal>()));\r
+\r
+ if (!is_dst_mem_good)\r
+ {\r
+ dst.create(dst_aux.size(), dst_aux.type());\r
+ dst_aux.copyTo(dst);\r
+ }\r
+ }\r
+ }\r
+ else\r
+ {\r
+ dst_rows = src.rows;\r
+ dst_cols = src.cols / 2 + 1;\r
+ if (src_aux.size() != src.size())\r
+ {\r
+ dst_rows = src.rows / 2 + 1;\r
+ dst_cols = src.cols;\r
+ }\r
+\r
+ is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2 \r
+ && dst.rows >= dst_rows && dst.cols >= dst_cols;\r
+\r
+ if (is_dst_mem_good)\r
+ dst_data = dst;\r
+ else\r
+ {\r
+ dst_data.create(1, dst_rows * dst_cols, CV_32FC2);\r
+ dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(), \r
+ dst_cols * dst_data.elemSize());\r
+ }\r
+\r
+ cufftSafeCall(cufftExecR2C(\r
+ plan, src_data.ptr<cufftReal>(), dst_data.ptr<cufftComplex>()));\r
+\r
+ if (!is_dst_mem_good)\r
+ {\r
+ dst.create(dst_aux.size(), dst_aux.type());\r
+ dst_aux.copyTo(dst);\r
+ }\r
+ }\r
+\r
+ cufftSafeCall(cufftDestroy(plan));\r
+}\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
// crossCorr\r
\r
namespace \r
{\r
try\r
{\r
- if (!test(1 + rand() % 100, 1 + rand() % 1000)) return;\r
- if (!testConj(1 + rand() % 100, 1 + rand() % 1000)) return;\r
- if (!testScaled(1 + rand() % 100, 1 + rand() % 1000)) return;\r
- if (!testScaledConj(1 + rand() % 100, 1 + rand() % 1000)) return;\r
+ test(0);\r
+ testConj(0);\r
+ testScaled(0);\r
+ testScaledConj(0);\r
+ test(DFT_ROWS);\r
+ testConj(DFT_ROWS);\r
+ testScaled(DFT_ROWS);\r
+ testScaledConj(DFT_ROWS);\r
}\r
catch (const Exception& e)\r
{\r
+ ts->printf(CvTS::CONSOLE, e.what());\r
if (!check_and_treat_gpu_exception(e, ts)) throw;\r
return;\r
}\r
return true;\r
}\r
\r
- bool test(int cols, int rows)\r
+ void test(int flags)\r
{\r
+ int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;\r
+\r
Mat a, b;\r
gen(cols, rows, a);\r
gen(cols, rows, b);\r
\r
Mat c_gold;\r
- mulSpectrums(a, b, c_gold, 0, false);\r
+ mulSpectrums(a, b, c_gold, flags, false);\r
\r
GpuMat d_c;\r
- mulSpectrums(GpuMat(a), GpuMat(b), d_c, 0, false);\r
+ mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, false);\r
\r
- return cmp(c_gold, Mat(d_c)) \r
- || (ts->printf(CvTS::CONSOLE, "test failed: cols=%d, rows=%d\n", cols, rows), false);\r
+ if (!cmp(c_gold, Mat(d_c)))\r
+ ts->printf(CvTS::CONSOLE, "test failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);\r
}\r
\r
- bool testConj(int cols, int rows)\r
+ void testConj(int flags)\r
{\r
+ int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;\r
+\r
Mat a, b;\r
gen(cols, rows, a);\r
gen(cols, rows, b);\r
\r
Mat c_gold;\r
- mulSpectrums(a, b, c_gold, 0, true);\r
+ mulSpectrums(a, b, c_gold, flags, true);\r
\r
GpuMat d_c;\r
- mulSpectrums(GpuMat(a), GpuMat(b), d_c, 0, true);\r
+ mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, true);\r
\r
- return cmp(c_gold, Mat(d_c)) \r
- || (ts->printf(CvTS::CONSOLE, "testConj failed: cols=%d, rows=%d\n", cols, rows), false);\r
+ if (!cmp(c_gold, Mat(d_c)))\r
+ ts->printf(CvTS::CONSOLE, "testConj failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);\r
}\r
\r
- bool testScaled(int cols, int rows)\r
+ void testScaled(int flags)\r
{\r
+ int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;\r
+\r
Mat a, b;\r
gen(cols, rows, a);\r
gen(cols, rows, b);\r
float scale = 1.f / a.size().area();\r
\r
Mat c_gold;\r
- mulSpectrums(a, b, c_gold, 0, false);\r
+ mulSpectrums(a, b, c_gold, flags, false);\r
\r
GpuMat d_c;\r
- mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, 0, scale, false);\r
+ mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, false);\r
\r
- return cmpScaled(c_gold, Mat(d_c), scale) \r
- || (ts->printf(CvTS::CONSOLE, "testScaled failed: cols=%d, rows=%d\n", cols, rows), false);\r
+ if (!cmpScaled(c_gold, Mat(d_c), scale))\r
+ ts->printf(CvTS::CONSOLE, "testScaled failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);\r
}\r
\r
- bool testScaledConj(int cols, int rows)\r
+ void testScaledConj(int flags)\r
{\r
+ int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;\r
+\r
Mat a, b;\r
gen(cols, rows, a);\r
gen(cols, rows, b);\r
float scale = 1.f / a.size().area();\r
\r
Mat c_gold;\r
- mulSpectrums(a, b, c_gold, 0, true);\r
+ mulSpectrums(a, b, c_gold, flags, true);\r
\r
GpuMat d_c;\r
- mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, 0, scale, true);\r
+ mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, true);\r
+\r
+ if (!cmpScaled(c_gold, Mat(d_c), scale))\r
+ ts->printf(CvTS::CONSOLE, "testScaledConj failed: cols=%d, rows=%d, flags=%D\n", cols, rows, flags);\r
+ }\r
+} CV_GpuMulSpectrumsTest_inst;\r
+\r
+\r
+struct CV_GpuDftTest: CvTest\r
+{\r
+ CV_GpuDftTest(): CvTest("GPU-DftTest", "dft") {}\r
+\r
+ void run(int)\r
+ {\r
+ try\r
+ {\r
+ int cols = 1 + rand() % 100, rows = 1 + rand() % 100;\r
+\r
+ testC2C(cols, rows, 0, "no flags");\r
+ testC2C(cols, rows + 1, 0, "no flags 0 1");\r
+ testC2C(cols, rows + 1, 0, "no flags 1 0");\r
+ testC2C(cols + 1, rows, 0, "no flags 1 1");\r
+ testC2C(cols, rows, DFT_INVERSE, "DFT_INVERSE");\r
+ testC2C(cols, rows, DFT_ROWS, "DFT_ROWS");\r
+ testC2C(1, rows, 0, "single col");\r
+ testC2C(cols, 1, 0, "single row");\r
+ testC2C(1, rows, DFT_INVERSE, "single col inversed");\r
+ testC2C(cols, 1, DFT_INVERSE, "single row inversed");\r
+ testC2C(cols, 1, DFT_ROWS, "single row DFT_ROWS");\r
+ testC2C(1, 2, 0, "size 1 2");\r
+ testC2C(2, 1, 0, "size 2 1");\r
+\r
+ testR2CThenC2R(cols, rows, "sanity");\r
+ testR2CThenC2R(cols, rows + 1, "sanity 0 1");\r
+ testR2CThenC2R(cols + 1, rows, "sanity 1 0");\r
+ testR2CThenC2R(cols + 1, rows + 1, "sanity 1 1");\r
+ testR2CThenC2R(1, rows, "single col");\r
+ testR2CThenC2R(1, rows + 1, "single col 1");\r
+ testR2CThenC2R(cols, 1, "single row" );;\r
+ testR2CThenC2R(cols + 1, 1, "single row 1" );;\r
+ }\r
+ catch (const Exception& e)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, e.what());\r
+ if (!check_and_treat_gpu_exception(e, ts)) throw;\r
+ return;\r
+ }\r
+ }\r
+\r
+ void gen(int cols, int rows, int cn, Mat& mat)\r
+ {\r
+ RNG rng;\r
+ mat.create(rows, cols, CV_MAKETYPE(CV_32F, cn));\r
+ rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f));\r
+ }\r
\r
- return cmpScaled(c_gold, Mat(d_c), scale) \r
- || (ts->printf(CvTS::CONSOLE, "testScaledConj failed: cols=%d, rows=%d\n", cols, rows), false);\r
+ bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f, float scale=1.f)\r
+ {\r
+ if (gold.size() != mine.size())\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad sizes: gold: %d %d, mine: %d %d\n", gold.cols, gold.rows, mine.cols, mine.rows);\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ return false;\r
+ }\r
+ if (gold.depth() != mine.depth())\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad depth: gold=%d, mine=%d\n", gold.depth(), mine.depth());\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ return false;\r
+ }\r
+ if (gold.channels() != mine.channels())\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad channel count: gold=%d, mine=%d\n", gold.channels(), mine.channels());\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ return false;\r
+ }\r
+ for (int i = 0; i < gold.rows; ++i)\r
+ {\r
+ for (int j = 0; j < gold.cols * gold.channels(); ++j)\r
+ {\r
+ float gold_ = gold.at<float>(i, j);\r
+ float mine_ = mine.at<float>(i, j) * scale;\r
+ if (fabs(gold_ - mine_) > max_err)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_);\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ return false;\r
+ }\r
+ }\r
+ }\r
+ return true;\r
+ }\r
+\r
+ void testC2C(int cols, int rows, int flags, const std::string& hint)\r
+ {\r
+ Mat a;\r
+ gen(cols, rows, 2, a);\r
+\r
+ Mat b_gold;\r
+ dft(a, b_gold, flags);\r
+\r
+ GpuMat d_b;\r
+ dft(GpuMat(a), d_b, flags);\r
+\r
+ bool ok = true;\r
+ if (ok && d_b.depth() != CV_32F)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad depth: %d\n", d_b.depth());\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ ok = false;\r
+ }\r
+ if (ok && d_b.channels() != 2)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad channel count: %d\n", d_b.channels());\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ ok = false;\r
+ }\r
+ if (ok) ok = cmp(b_gold, Mat(d_b), rows * cols * 1e-5f);\r
+ if (!ok) \r
+ ts->printf(CvTS::CONSOLE, "testC2C failed: hint=%s, cols=%d, rows=%d, flags=%d\n", hint.c_str(), cols, rows, flags);\r
+ }\r
+\r
+ void testR2CThenC2R(int cols, int rows, const std::string& hint)\r
+ {\r
+ Mat a;\r
+ gen(cols, rows, 1, a);\r
+\r
+ bool odd = false;\r
+ if (a.cols == 1) odd = a.rows % 2 == 1;\r
+ else odd = a.cols % 2 == 1;\r
+ bool ok = true;\r
+\r
+ GpuMat d_b;\r
+ GpuMat d_c;\r
+ dft(GpuMat(a), d_b, 0);\r
+ dft(d_b, d_c, DFT_REAL_OUTPUT, 0, odd);\r
+\r
+ if (ok && d_c.depth() != CV_32F)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad depth: %d\n", d_c.depth());\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ ok = false;\r
+ }\r
+ if (ok && d_c.channels() != 1)\r
+ {\r
+ ts->printf(CvTS::CONSOLE, "bad channel count: %d\n", d_c.channels());\r
+ ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+ ok = false;\r
+ }\r
+ if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f, 1.f / (rows * cols));\r
+ if (!ok) \r
+ ts->printf(CvTS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d\n", hint.c_str(), cols, rows);\r
}\r
-} CV_GpuMulSpectrumsTest_inst;
\ No newline at end of file
+} CV_GpuDftTest_inst;
\ No newline at end of file