// There is no need to print subtest statistics\r
return;\r
\r
- int cpu_time = static_cast<int>(cpu_elapsed_ / getTickFrequency() * 1000.0);\r
- int gpu_time = static_cast<int>(gpu_elapsed_ / getTickFrequency() * 1000.0);\r
+ //int cpu_time = static_cast<int>(cpu_elapsed_ / getTickFrequency() * 1000.0);\r
+ //int gpu_time = static_cast<int>(gpu_elapsed_ / getTickFrequency() * 1000.0);\r
+\r
+ double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;\r
+ double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;\r
\r
double speedup = static_cast<double>(cpu_elapsed_) /\r
std::max((int64)1, gpu_elapsed_);\r
"{ f | filter | | filter for test }"\r
"{ w | workdir | | set working directory }"\r
"{ l | list | false | show all tests }"\r
- "{ d | device | 0 | device id }";\r
+ "{ d | device | 0 | device id }"\r
+ "{ i | iters | 10 | iteration count }";\r
\r
CommandLineParser cmd(argc, argv, keys);\r
\r
string filter = cmd.get<string>("filter");\r
string workdir = cmd.get<string>("workdir");\r
bool list = cmd.get<bool>("list");\r
+ int iters = cmd.get<int>("iters");\r
\r
if (!filter.empty())\r
TestSystem::instance().setTestFilter(filter);\r
if (list)\r
TestSystem::instance().setListMode(true);\r
\r
+ TestSystem::instance().setIters(iters);\r
+\r
TestSystem::instance().run();\r
\r
return 0;\r
#include <iostream>\r
#include <cstdio>\r
#include <vector>\r
+#include <numeric>\r
#include <string>\r
#include "opencv2/core/core.hpp"\r
#include "opencv2/gpu/gpu.hpp"\r
void setTestFilter(const std::string& val) { test_filter_ = val; }\r
const std::string& testFilter() const { return test_filter_; }\r
\r
+ void setIters(int iters) { iters_ = iters; }\r
+\r
void addInit(Runnable* init) { inits_.push_back(init); }\r
void addTest(Runnable* test) { tests_.push_back(test); }\r
void run();\r
return cur_subtest_description_;\r
}\r
\r
+ bool stop() const { return it_ >= iters_; }\r
+\r
void cpuOn() { cpu_started_ = cv::getTickCount(); }\r
void cpuOff() \r
{\r
int64 delta = cv::getTickCount() - cpu_started_;\r
- cpu_elapsed_ += delta;\r
+ cpu_times_.push_back(delta);\r
+ ++it_;\r
+ }\r
+ void cpuComplete()\r
+ {\r
+ double delta_mean = std::accumulate(cpu_times_.begin(), cpu_times_.end(), 0.0) / iters_;\r
+ cpu_elapsed_ += delta_mean;\r
cur_subtest_is_empty_ = false;\r
- } \r
+ it_ = 0;\r
+ }\r
\r
void gpuOn() { gpu_started_ = cv::getTickCount(); }\r
void gpuOff() \r
{\r
int64 delta = cv::getTickCount() - gpu_started_;\r
- gpu_elapsed_ += delta;\r
+ gpu_times_.push_back(delta);\r
+ ++it_;\r
+ }\r
+ void gpuComplete()\r
+ {\r
+ double delta_mean = std::accumulate(gpu_times_.begin(), gpu_times_.end(), 0.0) / iters_;\r
+ gpu_elapsed_ += delta_mean;\r
cur_subtest_is_empty_ = false;\r
+ it_ = 0;\r
}\r
\r
bool isListMode() const { return is_list_mode_; }\r
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),\r
gpu_elapsed_(0), speedup_total_(0.0),\r
num_subtests_called_(0),\r
- is_list_mode_(false) {}\r
+ is_list_mode_(false) \r
+ {\r
+ iters_ = 10;\r
+ it_ = 0;\r
+ cpu_times_.reserve(iters_);\r
+ gpu_times_.reserve(iters_);\r
+ }\r
\r
void finishCurrentSubtest();\r
void resetCurrentSubtest() \r
gpu_elapsed_ = 0;\r
cur_subtest_description_.str("");\r
cur_subtest_is_empty_ = true;\r
+ it_ = 0;\r
+ cpu_times_.clear();\r
+ gpu_times_.clear();\r
}\r
\r
void printHeading();\r
int num_subtests_called_;\r
\r
bool is_list_mode_;\r
+\r
+ int iters_;\r
+ int it_;\r
+ std::vector<int64> cpu_times_;\r
+ std::vector<int64> gpu_times_;\r
};\r
\r
\r
void name##_test::run()\r
\r
#define SUBTEST TestSystem::instance().startNewSubtest()\r
-#define CPU_ON TestSystem::instance().cpuOn()\r
-#define GPU_ON TestSystem::instance().gpuOn()\r
-#define CPU_OFF TestSystem::instance().cpuOff()\r
-#define GPU_OFF TestSystem::instance().gpuOff()\r
+\r
+#define CPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().cpuOn()\r
+#define CPU_OFF TestSystem::instance().cpuOff(); } TestSystem::instance().cpuComplete()\r
+\r
+#define GPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().gpuOn()\r
+#define GPU_OFF TestSystem::instance().gpuOff(); } TestSystem::instance().gpuComplete()\r
\r
// Generates matrix\r
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low, \r
gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);\r
GPU_OFF;\r
}\r
+\r
+ for (int size = 2000; size <= 4000; size += 1000)\r
+ {\r
+ SUBTEST << size << 'x' << size << ", 32FC1, THRESH_TRUNC [NPP]";\r
+\r
+ gen(src, size, size, CV_32FC1, 0, 100);\r
+\r
+ threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);\r
+\r
+ CPU_ON; \r
+ threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);\r
+ CPU_OFF;\r
+\r
+ d_src.upload(src);\r
+\r
+ gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);\r
+\r
+ GPU_ON;\r
+ gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);\r
+ GPU_OFF;\r
+ }\r
}\r
\r
TEST(pow)\r