simplified gpu::columnSum test, it doesn't fail on Quadro anymore (when seed is 00000...
authorAlexey Spizhevoy <no@email>
Mon, 27 Dec 2010 09:10:22 +0000 (09:10 +0000)
committerAlexey Spizhevoy <no@email>
Mon, 27 Dec 2010 09:10:22 +0000 (09:10 +0000)
modules/gpu/src/imgproc_gpu.cpp
tests/gpu/src/gputest_main.cpp
tests/gpu/src/imgproc_gpu.cpp

index 6022bec..bf8e7f6 100644 (file)
@@ -1156,25 +1156,25 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
     if (src_data.data != src.data)\r
         src.copyTo(src_data);\r
 \r
-    Size dft_size_ = dft_size;\r
+    Size dft_size_opt = dft_size;\r
     if (is_1d_input && !is_row_dft)\r
     {\r
         // If the source matrix is single column handle it as single row\r
-        dft_size_.width = std::max(dft_size.width, dft_size.height);\r
-        dft_size_.height = std::min(dft_size.width, dft_size.height);\r
+        dft_size_opt.width = std::max(dft_size.width, dft_size.height);\r
+        dft_size_opt.height = std::min(dft_size.width, dft_size.height);\r
     }\r
 \r
     cufftType dft_type = CUFFT_R2C;\r
     if (is_complex_input) \r
         dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
 \r
-    CV_Assert(dft_size_.width > 1);\r
+    CV_Assert(dft_size_opt.width > 1);\r
 \r
     cufftHandle plan;\r
     if (is_1d_input || is_row_dft)\r
-        cufftPlan1d(&plan, dft_size_.width, dft_type, dft_size_.height);\r
+        cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);\r
     else\r
-        cufftPlan2d(&plan, dft_size_.height, dft_size_.width, dft_type);\r
+        cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);\r
 \r
     if (is_complex_input)\r
     {\r
@@ -1194,7 +1194,8 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
     }\r
     else\r
     {\r
-        if (dft_size == dft_size_)\r
+        // We could swap dft_size for efficiency. Here we must reflect it\r
+        if (dft_size == dft_size_opt)\r
             createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);\r
         else\r
             createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);\r
@@ -1206,7 +1207,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
     cufftSafeCall(cufftDestroy(plan));\r
 \r
     if (is_scaled_dft)\r
-        multiply(dst, Scalar::all(1. / (dft_size.area())), dst);\r
+        multiply(dst, Scalar::all(1. / dft_size.area()), dst);\r
 }\r
 \r
 //////////////////////////////////////////////////////////////////////////////\r
@@ -1260,18 +1261,19 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
     block_size.width = std::min(dft_size.width - templ.cols + 1, result.cols);\r
     block_size.height = std::min(dft_size.height - templ.rows + 1, result.rows);\r
 \r
-    GpuMat result_data = createContinuous(dft_size, CV_32F);\r
-\r
     int spect_len = dft_size.height * (dft_size.width / 2 + 1);\r
-    GpuMat image_spect(1, spect_len, CV_32FC2);\r
-    GpuMat templ_spect(1, spect_len, CV_32FC2);\r
-    GpuMat result_spect(1, spect_len, CV_32FC2);\r
+    GpuMat image_spect = createContinuous(1, spect_len, CV_32FC2);\r
+    GpuMat templ_spect = createContinuous(1, spect_len, CV_32FC2);\r
+    GpuMat result_spect = createContinuous(1, spect_len, CV_32FC2);\r
 \r
     cufftHandle planR2C, planC2R;\r
     cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));\r
     cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));\r
 \r
+    GpuMat image_block = createContinuous(dft_size, CV_32F);\r
     GpuMat templ_block = createContinuous(dft_size, CV_32F);\r
+    GpuMat result_data = createContinuous(dft_size, CV_32F);\r
+\r
     GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);\r
     copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, \r
                    templ_block.cols - templ_roi.cols, 0);\r
@@ -1279,8 +1281,6 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
     cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), \r
                                templ_spect.ptr<cufftComplex>()));\r
 \r
-    GpuMat image_block = createContinuous(dft_size, CV_32F);\r
-\r
     // Process all blocks of the result matrix\r
     for (int y = 0; y < result.rows; y += block_size.height)\r
     {\r
index 366d2ee..cbeb0d0 100644 (file)
@@ -47,7 +47,6 @@ const char* blacklist[] =
 {
     "GPU-AsyncGpuMatOperator",     // crash
     "GPU-NppImageCanny",            // NPP_TEXTURE_BIND_ERROR
-    "GPU-BruteForceMatcher", // often crashes when seed=000001af5a11badd
     0
 };
 
index 9557b88..7766d80 100644 (file)
@@ -768,34 +768,37 @@ struct CV_GpuColumnSumTest: CvTest
     {\r
         try\r
         {\r
-            int n = 375;\r
-            int m = 1072;\r
-            Mat src(n, m, CV_32F);\r
+            int cols = 375;\r
+            int rows = 1072;\r
+\r
+            Mat src(rows, cols, CV_32F);\r
             RNG rng(1);\r
             rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(1));\r
-            Mat dst_gold, dst2_gold;\r
-\r
-            integral(src, dst_gold, dst2_gold);\r
 \r
-            GpuMat dsrc(src);\r
-            GpuMat buf;\r
-            GpuMat dst;\r
-            columnSum(dsrc, buf);\r
-            transpose(buf, dst);\r
-            columnSum(dst, buf);\r
-            transpose(buf, dst);\r
+            GpuMat d_dst;\r
+            columnSum(GpuMat(src), d_dst);\r
             \r
-            Mat dst_ = dst;\r
-            for (int i = 0; i < dst_.rows; ++i)\r
+            Mat dst = d_dst;\r
+            for (int j = 0; j < src.cols; ++j)\r
+            {\r
+                float a = src.at<float>(0, j);\r
+                float b = dst.at<float>(0, j);\r
+                if (fabs(a - b) > 0.5f)\r
+                {\r
+                    ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", 0, j, a, b);\r
+                    ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
+                    return;\r
+                }\r
+            }\r
+            for (int i = 1; i < src.rows; ++i)\r
             {\r
-                const double* dst_gold_data = (const double*)dst_gold.ptr(i + 1);\r
-                for (int j = 0; j < dst_.cols; ++j)\r
+                for (int j = 0; j < src.cols; ++j)\r
                 {\r
-                    float a = (float)dst_gold_data[j + 1];\r
-                    float b = dst_.at<float>(i, j);\r
+                    float a = src.at<float>(i, j) += src.at<float>(i - 1, j);\r
+                    float b = dst.at<float>(i, j);\r
                     if (fabs(a - b) > 0.5f)\r
                     {\r
-                        ts->printf(CvTS::CONSOLE, "%d %d %f %f\n", i, j, a, b);\r
+                        ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", i, j, a, b);\r
                         ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
                         return;\r
                     }\r
@@ -804,6 +807,7 @@ struct CV_GpuColumnSumTest: CvTest
         }\r
         catch (const Exception& e)\r
         {\r
+            ts->printf(CvTS::CONSOLE, e.what());\r
             if (!check_and_treat_gpu_exception(e, ts)) throw;\r
             return;\r
         }\r