From 557dd39f0312cb7254721898b9e2274e4dba40af Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Wed, 2 Feb 2011 07:23:55 +0000 Subject: [PATCH] fixed gpu::sum* on CC1.0, updated some tests --- modules/gpu/include/opencv2/gpu/gpu.hpp | 10 ++++++++-- modules/gpu/src/cuda/matrix_reductions.cu | 6 +++++- modules/gpu/src/initialization.cpp | 6 +----- tests/gpu/src/arithm.cpp | 10 ++++++++++ tests/gpu/src/meanshift.cpp | 4 ++-- tests/gpu/src/mssegmentation.cpp | 2 +- 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index df30a8c..0862ee6 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -66,8 +66,14 @@ namespace cv enum GpuFeature { - NATIVE_DOUBLE, - ATOMICS + COMPUTE_10 = 10, + COMPUTE_11 = 11, + COMPUTE_12 = 12, + COMPUTE_13 = 13, + COMPUTE_20 = 20, + COMPUTE_21 = 21, + ATOMICS = COMPUTE_11, + NATIVE_DOUBLE = COMPUTE_13 }; class CV_EXPORTS TargetArchs diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 398e376..396a9d7 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -1394,7 +1394,7 @@ namespace cv { namespace gpu { namespace mathfunc smem[tid] = res.x; smem[tid + nthreads] = res.y; smem[tid + 2 * nthreads] = res.z; - smem[tid + 3 * nthreads] = res.z; + smem[tid + 3 * nthreads] = res.w; __syncthreads(); sumInSmem(smem, tid); @@ -1432,21 +1432,25 @@ namespace cv { namespace gpu { namespace mathfunc src, (typename TypeVec::vec_t*)buf.ptr(0)); sumPass2Kernel<<<1, threads_x * threads_y>>>( (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; case 2: sumKernel_C2, threads_x * threads_y><<>>( src, (typename TypeVec::vec_t*)buf.ptr(0)); sumPass2Kernel_C2<<<1, threads_x * threads_y>>>( (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; case 3: sumKernel_C3, threads_x * threads_y><<>>( src, (typename TypeVec::vec_t*)buf.ptr(0)); sumPass2Kernel_C3<<<1, threads_x * threads_y>>>( (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; case 4: sumKernel_C4, threads_x * threads_y><<>>( src, (typename TypeVec::vec_t*)buf.ptr(0)); sumPass2Kernel_C4<<<1, threads_x * threads_y>>>( (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; } cudaSafeCall(cudaThreadSynchronize()); diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index d11b95c..039302c 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -71,11 +71,7 @@ namespace CV_EXPORTS bool cv::gpu::TargetArchs::builtWith(cv::gpu::GpuFeature feature) { - if (feature == NATIVE_DOUBLE) - return ::compareToSet(CUDA_ARCH_FEATURES, 13, std::greater_equal()); - if (feature == ATOMICS) - return ::compareToSet(CUDA_ARCH_FEATURES, 11, std::greater_equal()); - return true; + return ::compareToSet(CUDA_ARCH_FEATURES, feature, std::greater_equal()); } diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp index 28cbc42..b7501a4 100644 --- a/tests/gpu/src/arithm.cpp +++ b/tests/gpu/src/arithm.cpp @@ -947,6 +947,16 @@ struct CV_GpuSumTest: CvTest // sum // + gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 1), src); + a = sum(src); + b = sum(GpuMat(src)); + if (abs(a[0] - b[0]) > src.size().area() * max_err) + { + ts->printf(CvTS::CONSOLE, "1 cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[0], b[0]); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src); a = sum(src); b = sum(GpuMat(src)); diff --git a/tests/gpu/src/meanshift.cpp b/tests/gpu/src/meanshift.cpp index dc39423..d1bd345 100644 --- a/tests/gpu/src/meanshift.cpp +++ b/tests/gpu/src/meanshift.cpp @@ -56,7 +56,7 @@ struct CV_GpuMeanShiftTest : public CvTest cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png"); cv::Mat img_template; - if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && cv::gpu::DeviceInfo().major() >= 2) + if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && cv::gpu::DeviceInfo().major() >= 2) img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png"); else img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png"); @@ -199,7 +199,7 @@ struct CV_GpuMeanShiftProcTest : public CvTest cv::Mat spmap_template; cv::FileStorage fs; - if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && cv::gpu::DeviceInfo().major() >= 2) + if (cv::gpu::TargetArchs::builtWith(cv::gpu::COMPUTE_20) && cv::gpu::DeviceInfo().major() >= 2) fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ); else fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ); diff --git a/tests/gpu/src/mssegmentation.cpp b/tests/gpu/src/mssegmentation.cpp index ea09b01..15a72f3 100644 --- a/tests/gpu/src/mssegmentation.cpp +++ b/tests/gpu/src/mssegmentation.cpp @@ -69,7 +69,7 @@ struct CV_GpuMeanShiftSegmentationTest : public CvTest { { stringstream path; path << ts->get_data_path() << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize; - if (TargetArchs::hasEqualOrGreater(2, 0) && DeviceInfo().major() >= 2) + if (TargetArchs::builtWith(COMPUTE_20) && DeviceInfo().major() >= 2) path << ".png"; else path << "_CC1X.png"; -- 2.7.4