From 79ba160c1cfc545202e55daaac9b487d1b512696 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Wed, 26 Jan 2011 07:49:56 +0000 Subject: [PATCH] added more GPU perf. tests, refactored --- samples/gpu/performance/performance.cpp | 38 +++++++------- samples/gpu/performance/performance.h | 43 ++++++++-------- samples/gpu/performance/tests.cpp | 89 ++++++++++++++++++++++----------- 3 files changed, 99 insertions(+), 71 deletions(-) diff --git a/samples/gpu/performance/performance.cpp b/samples/gpu/performance/performance.cpp index 68d5811..c112072 100644 --- a/samples/gpu/performance/performance.cpp +++ b/samples/gpu/performance/performance.cpp @@ -9,11 +9,14 @@ void TestSystem::run() // Run initializers vector::iterator it = inits_.begin(); for (; it != inits_.end(); ++it) + { (*it)->run(); + } cout << setiosflags(ios_base::left); cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" - << setw(10) << "SPEEDUP" << "DESCRIPTION\n"; + << setw(10) << "SPEEDUP" + << "DESCRIPTION\n"; cout << resetiosflags(ios_base::left); // Run tests @@ -24,30 +27,23 @@ void TestSystem::run() try { (*it)->run(); - flush_subtest_data(); + flushSubtestData(); } - catch (const cv::Exception& e) + catch (const cv::Exception&) { - cout << TAB << "error"; - switch (e.code) - { - case CV_StsNoMem: cout << ": out of memory"; break; - } - if (!description_.str().empty()) - cout << " [" << description_.str() << "]"; - cout << endl; - reset_subtest_data(); + resetSubtestData(); } } - cout << setiosflags(ios_base::fixed | ios_base::left); - cout << "\naverage GPU speedup: x" << setprecision(3) - << speedup_total_ / num_subtests_called_ << endl; - cout << resetiosflags(ios_base::fixed | ios_base::left); + cout << setiosflags(ios_base::fixed); + cout << "\naverage GPU speedup: x" + << setprecision(3) << speedup_total_ / num_subtests_called_ + << endl; + cout << resetiosflags(ios_base::fixed); } -void TestSystem::flush_subtest_data() +void TestSystem::flushSubtestData() { if (!can_flush_) return; @@ -58,9 +54,10 @@ void TestSystem::flush_subtest_data() double speedup = static_cast(cpu_time) / std::max(1, gpu_time); speedup_total_ += speedup; - cout << TAB << setiosflags(ios_base::fixed | ios_base::left); + cout << TAB << setiosflags(ios_base::left); stringstream stream; + stream << cpu_time; cout << setw(10) << stream.str(); @@ -73,11 +70,10 @@ void TestSystem::flush_subtest_data() cout << setw(10) << stream.str(); cout << description_.str(); - - cout << resetiosflags(ios_base::fixed | ios_base::left) << endl; + cout << resetiosflags(ios_base::left) << endl; num_subtests_called_++; - reset_subtest_data(); + resetSubtestData(); } diff --git a/samples/gpu/performance/performance.h b/samples/gpu/performance/performance.h index 7b833b0..9ed575b 100644 --- a/samples/gpu/performance/performance.h +++ b/samples/gpu/performance/performance.h @@ -38,6 +38,13 @@ public: void run(); + // Ends current subtest and starts new one + std::stringstream& subtest() + { + flushSubtestData(); + return description_; + } + void cpuOn() { cpu_started_ = cv::getTickCount(); } void cpuOff() @@ -56,20 +63,13 @@ public: can_flush_ = true; } - // Ends current subtest and starts new one - std::stringstream& subtest() - { - flush_subtest_data(); - return description_; - } - private: TestSystem(): can_flush_(false), cpu_elapsed_(0), gpu_elapsed_(0), speedup_total_(0.0), num_subtests_called_(0) {}; - void flush_subtest_data(); + void flushSubtestData(); - void reset_subtest_data() + void resetSubtestData() { cpu_elapsed_ = 0; gpu_elapsed_ = 0; @@ -93,17 +93,6 @@ private: }; -#define TEST(name) \ - struct name##_test: Runnable \ - { \ - name##_test(): Runnable(#name) { \ - TestSystem::instance()->addTest(this); \ - } \ - void run(); \ - } name##_test_instance; \ - void name##_test::run() - - #define INIT(name) \ struct name##_init: Runnable \ { \ @@ -115,12 +104,22 @@ private: void name##_init::run() +#define TEST(name) \ + struct name##_test: Runnable \ + { \ + name##_test(): Runnable(#name) { \ + TestSystem::instance()->addTest(this); \ + } \ + void run(); \ + } name##_test_instance; \ + void name##_test::run() + +#define SUBTEST TestSystem::instance()->subtest() +#define DESCRIPTION TestSystem::instance()->subtest() #define CPU_ON TestSystem::instance()->cpuOn() #define GPU_ON TestSystem::instance()->gpuOn() #define CPU_OFF TestSystem::instance()->cpuOff() #define GPU_OFF TestSystem::instance()->gpuOff() -#define SUBTEST TestSystem::instance()->subtest() -#define DESCRIPTION TestSystem::instance()->subtest() void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low, cv::Scalar high); diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index f0729b9..7ef8ad4 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -6,42 +6,40 @@ using namespace std; using namespace cv; -// This code calls CUFFT DFT and initializes that lib -INIT(CUFFT_library) +INIT(matchTemplate) { - Mat src, templ; - gen(src, 500, 500, CV_32F, 0, 1); - gen(templ, 500, 500, CV_32F, 0, 1); + Mat src; gen(src, 500, 500, CV_32F, 0, 1); + Mat templ; gen(templ, 500, 500, CV_32F, 0, 1); - gpu::GpuMat d_src(src); - gpu::GpuMat d_templ(templ); - gpu::GpuMat d_result; + gpu::GpuMat d_src(src), d_templ(templ), d_dst; - gpu::matchTemplate(d_src, d_templ, d_result, CV_TM_CCORR); + gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); } TEST(matchTemplate) { - Mat src, templ, result; + Mat src, templ, dst; gen(src, 3000, 3000, CV_32F, 0, 1); - gpu::GpuMat d_image(src), d_templ, d_result; + gpu::GpuMat d_src(src), d_templ, d_dst; - for (int templ_size = 5; templ_size <= 1000; templ_size *= 2) + for (int templ_size = 5; templ_size < 200; templ_size *= 5) { SUBTEST << "src " << src.rows << ", templ " << templ_size << ", 32F, CCORR"; gen(templ, templ_size, templ_size, CV_32F, 0, 1); + dst.create(src.rows - templ.rows + 1, src.cols - templ.cols + 1, CV_32F); CPU_ON; - matchTemplate(src, templ, result, CV_TM_CCORR); + matchTemplate(src, templ, dst, CV_TM_CCORR); CPU_OFF; d_templ = templ; + d_dst.create(d_src.rows - d_templ.rows + 1, d_src.cols - d_templ.cols + 1, CV_32F); GPU_ON; - gpu::matchTemplate(d_image, d_templ, d_result, CV_TM_CCORR); + gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); GPU_OFF; } } @@ -86,6 +84,7 @@ TEST(remap) gen(src, size, size, CV_8UC1, 0, 256); gen(xmap, size, size, CV_32F, 0, size); gen(ymap, size, size, CV_32F, 0, size); + dst.create(xmap.size(), src.type()); CPU_ON; remap(src, dst, xmap, ymap, INTER_LINEAR); @@ -94,6 +93,7 @@ TEST(remap) d_src = src; d_xmap = xmap; d_ymap = ymap; + d_dst.create(d_xmap.size(), d_src.type()); GPU_ON; gpu::remap(d_src, d_dst, d_xmap, d_ymap); @@ -107,17 +107,19 @@ TEST(dft) Mat src, dst; gpu::GpuMat d_src, d_dst; - for (int size = 1000; size <= 8000; size *= 2) + for (int size = 1000; size <= 4000; size *= 2) { SUBTEST << "size " << size << ", 32FC2, complex-to-complex"; gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1)); + dst.create(src.size(), src.type()); CPU_ON; dft(src, dst); CPU_OFF; d_src = src; + d_dst.create(d_src.size(), d_src.type()); GPU_ON; gpu::dft(d_src, d_dst, Size(size, size)); @@ -136,12 +138,14 @@ TEST(cornerHarris) SUBTEST << "size " << size << ", 32FC1"; gen(src, size, size, CV_32F, 0, 1); + dst.create(src.size(), src.type()); CPU_ON; cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101); CPU_OFF; d_src = src; + d_dst.create(src.size(), src.type()); GPU_ON; gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1); @@ -150,22 +154,51 @@ TEST(cornerHarris) } -TEST(memoryAllocation) +TEST(integral) { - Mat mat; - gpu::GpuMat d_mat; + Mat src, sum; + gpu::GpuMat d_src, d_sum; + + for (int size = 1000; size <= 8000; size *= 2) + { + SUBTEST << "size " << size << ", 8U"; - int begin = 100, end = 8000, step = 100; + gen(src, size, size, CV_8U, 0, 256); + sum.create(size + 1, size + 1, CV_32S); - DESCRIPTION << "32F matrices from " << begin << " to " << end; + CPU_ON; + integral(src, sum); + CPU_OFF; - CPU_ON; - for (int size = begin; size <= end; size += step) - mat.create(size, size, CV_32FC1); - CPU_OFF; + d_src = src; + d_sum.create(size + 1, size + 1, CV_32S); - GPU_ON; - for (int size = begin; size <= end; size += step) - d_mat.create(size, size, CV_32FC1); - GPU_OFF; + GPU_ON; + gpu::integral(d_src, d_sum); + GPU_OFF; + } +} + + +TEST(norm) +{ + Mat src; + gpu::GpuMat d_src; + + for (int size = 1000; size <= 8000; size *= 2) + { + SUBTEST << "size " << size << ", 8U"; + + gen(src, size, size, CV_8U, 0, 256); + + CPU_ON; + norm(src); + CPU_OFF; + + d_src = src; + + GPU_ON; + gpu::norm(d_src); + GPU_OFF; + } } -- 2.7.4