SANITY_CHECK(dst_host);\r
}\r
\r
-PERF_TEST_P(DevInfo_Size, convolve, testing::Combine(testing::ValuesIn(devices()),\r
- testing::Values(GPU_TYPICAL_MAT_SIZES)))\r
+PERF_TEST_P(DevInfo_Int_Int, convolve, testing::Combine(testing::ValuesIn(devices()),\r
+ testing::Values(512, 1024, 1536, 2048, 2560, 3072, 3584),\r
+ testing::Values(27, 32, 64)))\r
{\r
DeviceInfo devInfo = std::tr1::get<0>(GetParam());\r
- Size size = std::tr1::get<1>(GetParam());\r
+ int image_size = std::tr1::get<1>(GetParam());\r
+ int templ_size = std::tr1::get<2>(GetParam());\r
\r
setDevice(devInfo.deviceID());\r
\r
- Mat image_host(size, CV_32FC1);\r
- Mat templ_host(size, CV_32FC1);\r
+ Mat image_host(image_size, image_size, CV_32FC1);\r
+ Mat templ_host(templ_size, templ_size, CV_32FC1);\r
\r
declare.in(image_host, templ_host, WARMUP_RNG);\r
\r
\r
typedef TestBaseWithParam<DeviceInfo> DevInfo;\r
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size;\r
+typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_Int_Int;\r
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType;\r
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType;\r
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType;\r
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)\r
{\r
int scale = 40;\r
- Size bsize_min(1024, 1024);\r
+ Size bsize_min(512, 512);\r
\r
// Check whether we use Fermi generation or newer GPU\r
if (DeviceInfo().majorVersion() >= 2)\r
{\r
- bsize_min.width = 2048;\r
- bsize_min.height = 2048;\r
+ bsize_min.width = 1024;\r
+ bsize_min.height = 1024;\r
}\r
\r
Size bsize(std::max(templ_size.width * scale, bsize_min.width),\r
std::max(templ_size.height * scale, bsize_min.height));\r
\r
+ int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;\r
+ int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;\r
+ bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;\r
+ bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;\r
+\r
bsize.width = std::min(bsize.width, result_size.width);\r
bsize.height = std::min(bsize.height, result_size.height);\r
return bsize;\r
\r
void TestSystem::run()\r
{\r
- // Run test initializers\r
- vector<Runnable*>::iterator it = inits_.begin();\r
- for (; it != inits_.end(); ++it)\r
+ if (is_list_mode_)\r
+ {\r
+ for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)\r
+ cout << (*it)->name() << endl;\r
+ return;\r
+ }\r
+\r
+ // Run test initializers \r
+ for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)\r
{\r
if ((*it)->name().find(test_filter_, 0) != string::npos)\r
(*it)->run();\r
printHeading();\r
\r
// Run tests\r
- it = tests_.begin();\r
- for (; it != tests_.end(); ++it)\r
+ for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)\r
{\r
try\r
{\r
string key = argv[i];\r
if (key == "--help")\r
{\r
- cout << "Usage: performance_gpu [--filter <test_filter>] [--working-dir <working_dir_with_slash>]\n";\r
+ cout << "Usage: performance_gpu [--ls] [--filter <test_filter>] [--workdir <working_dir_with_slash>]\n";\r
return 0;\r
}\r
if (key == "--filter" && i + 1 < argc)\r
TestSystem::instance().setTestFilter(argv[++i]);\r
- else if (key == "--working-dir" && i + 1 < argc)\r
+ else if (key == "--workdir" && i + 1 < argc)\r
TestSystem::instance().setWorkingDir(argv[++i]);\r
+ else if (key == "--ls")\r
+ TestSystem::instance().setListMode(true);\r
else \r
{\r
cout << "Unknown parameter: '" << key << "'" << endl;\r
cur_subtest_is_empty_ = false;\r
}\r
\r
+ bool isListMode() const { return is_list_mode_; }\r
+ void setListMode(bool value) { is_list_mode_ = value; }\r
+\r
private:\r
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),\r
gpu_elapsed_(0), speedup_total_(0.0),\r
- num_subtests_called_(0) {}\r
+ num_subtests_called_(0),\r
+ is_list_mode_(false) {}\r
\r
void finishCurrentSubtest();\r
void resetCurrentSubtest() \r
\r
double speedup_total_;\r
int num_subtests_called_;\r
+\r
+ bool is_list_mode_;\r
};\r
\r
\r