Updated optimal block size estimation for the convolve() function
authorAlexey Spizhevoy <no@email>
Mon, 3 Oct 2011 14:05:52 +0000 (14:05 +0000)
committerAlexey Spizhevoy <no@email>
Mon, 3 Oct 2011 14:05:52 +0000 (14:05 +0000)
modules/gpu/perf/perf_imgproc.cpp
modules/gpu/perf/perf_utility.hpp
modules/gpu/src/imgproc.cpp
samples/gpu/performance/performance.cpp
samples/gpu/performance/performance.h

index f239edb..81dd559 100644 (file)
@@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()),
     SANITY_CHECK(dst_host);\r
 }\r
 \r
-PERF_TEST_P(DevInfo_Size, convolve, testing::Combine(testing::ValuesIn(devices()),\r
-                                                testing::Values(GPU_TYPICAL_MAT_SIZES)))\r
+PERF_TEST_P(DevInfo_Int_Int, convolve, testing::Combine(testing::ValuesIn(devices()),\r
+                                                     testing::Values(512, 1024, 1536, 2048, 2560, 3072, 3584),\r
+                                                     testing::Values(27, 32, 64)))\r
 {\r
     DeviceInfo devInfo = std::tr1::get<0>(GetParam());\r
-    Size size = std::tr1::get<1>(GetParam());\r
+    int image_size = std::tr1::get<1>(GetParam());\r
+    int templ_size = std::tr1::get<2>(GetParam());\r
 \r
     setDevice(devInfo.deviceID());\r
 \r
-    Mat image_host(size, CV_32FC1);\r
-    Mat templ_host(size, CV_32FC1);\r
+    Mat image_host(image_size, image_size, CV_32FC1);\r
+    Mat templ_host(templ_size, templ_size, CV_32FC1);\r
 \r
     declare.in(image_host, templ_host, WARMUP_RNG);\r
 \r
index a57e367..17f9418 100644 (file)
@@ -32,6 +32,7 @@ struct CvtColorInfo
 \r
 typedef TestBaseWithParam<DeviceInfo> DevInfo;\r
 typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size;\r
+typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_Int_Int;\r
 typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType;\r
 typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType;\r
 typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType;\r
index 8b86ce6..47b0998 100644 (file)
@@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
 Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)\r
 {\r
     int scale = 40;\r
-    Size bsize_min(1024, 1024);\r
+    Size bsize_min(512, 512);\r
 \r
     // Check whether we use Fermi generation or newer GPU\r
     if (DeviceInfo().majorVersion() >= 2)\r
     {\r
-        bsize_min.width = 2048;\r
-        bsize_min.height = 2048;\r
+        bsize_min.width = 1024;\r
+        bsize_min.height = 1024;\r
     }\r
 \r
     Size bsize(std::max(templ_size.width * scale, bsize_min.width),\r
                std::max(templ_size.height * scale, bsize_min.height));\r
 \r
+    int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;\r
+    int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;\r
+    bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;\r
+    bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;\r
+\r
     bsize.width = std::min(bsize.width, result_size.width);\r
     bsize.height = std::min(bsize.height, result_size.height);\r
     return bsize;\r
index b9bbc85..6b1619e 100644 (file)
@@ -8,9 +8,15 @@ using namespace cv;
 \r
 void TestSystem::run()\r
 {\r
-    // Run test initializers\r
-    vector<Runnable*>::iterator it = inits_.begin();\r
-    for (; it != inits_.end(); ++it)\r
+    if (is_list_mode_)\r
+    {\r
+        for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)\r
+            cout << (*it)->name() << endl;\r
+        return;\r
+    }\r
+\r
+    // Run test initializers    \r
+    for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)\r
     {\r
         if ((*it)->name().find(test_filter_, 0) != string::npos)\r
             (*it)->run();\r
@@ -19,8 +25,7 @@ void TestSystem::run()
     printHeading();\r
 \r
     // Run tests\r
-    it = tests_.begin();\r
-    for (; it != tests_.end(); ++it)\r
+    for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)\r
     {\r
         try\r
         {\r
@@ -145,13 +150,15 @@ int main(int argc, char** argv)
         string key = argv[i];\r
         if (key == "--help")\r
         {\r
-            cout << "Usage: performance_gpu [--filter <test_filter>] [--working-dir <working_dir_with_slash>]\n";\r
+            cout << "Usage: performance_gpu [--ls] [--filter <test_filter>] [--workdir <working_dir_with_slash>]\n";\r
             return 0;\r
         }\r
         if (key == "--filter" && i + 1 < argc)\r
             TestSystem::instance().setTestFilter(argv[++i]);\r
-        else if (key == "--working-dir" && i + 1 < argc)\r
+        else if (key == "--workdir" && i + 1 < argc)\r
             TestSystem::instance().setWorkingDir(argv[++i]);\r
+        else if (key == "--ls")\r
+            TestSystem::instance().setListMode(true);\r
         else \r
         {\r
             cout << "Unknown parameter: '" << key << "'" << endl;\r
index 0031950..007309b 100644 (file)
@@ -68,10 +68,14 @@ public:
         cur_subtest_is_empty_ = false;\r
     }\r
 \r
+    bool isListMode() const { return is_list_mode_; }\r
+    void setListMode(bool value) { is_list_mode_ = value; }\r
+\r
 private:\r
     TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),\r
                   gpu_elapsed_(0), speedup_total_(0.0),\r
-                  num_subtests_called_(0) {}\r
+                  num_subtests_called_(0),\r
+                  is_list_mode_(false) {}\r
 \r
     void finishCurrentSubtest();\r
     void resetCurrentSubtest() \r
@@ -100,6 +104,8 @@ private:
 \r
     double speedup_total_;\r
     int num_subtests_called_;\r
+\r
+    bool is_list_mode_;\r
 };\r
 \r
 \r