From 891e2ff31038faf9184904a6820dbd2975a56bc5 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Thu, 27 Jan 2011 10:06:38 +0000 Subject: [PATCH] replaced has* methods in the GPU module with the TargetArchs monostate --- doc/gpu_initialization.tex | 80 ++++++++------------------------- modules/gpu/include/opencv2/gpu/gpu.hpp | 31 ++++++++----- modules/gpu/src/initialization.cpp | 48 ++++++++++++-------- modules/gpu/src/matrix_reductions.cpp | 20 ++++----- modules/gpu/src/split_merge.cpp | 6 +-- tests/gpu/src/arithm.cpp | 4 +- tests/gpu/src/bitwise_oper.cpp | 2 +- tests/gpu/src/match_template.cpp | 4 +- tests/gpu/src/meanshift.cpp | 4 +- tests/gpu/src/mssegmentation.cpp | 2 +- tests/gpu/src/split_merge.cpp | 10 ++--- 11 files changed, 95 insertions(+), 116 deletions(-) diff --git a/doc/gpu_initialization.tex b/doc/gpu_initialization.tex index 15645c1..ea042c9 100644 --- a/doc/gpu_initialization.tex +++ b/doc/gpu_initialization.tex @@ -68,77 +68,35 @@ Returns true, if the specified GPU has atomics support, otherwise false. \cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.} \end{description} +\cvclass{gpu::TargetArchs} +This class provides functionality (as set of static methods) for checking which NVIDIA card architectures the GPU module was built for. -\cvCppFunc{gpu::hasPtxVersion} -Returns true, if the GPU module has PTX code for the given architecture, otherwise false. +\bigskip -\cvdefCpp{bool hasPtxVersion(int major, int minor);} +The following method checks whether the module was built with the support of the given feature: +\cvdefCpp{static bool builtWith(GpuFeature feature);} \begin{description} -\cvarg{major}{Major compute capability version.} -\cvarg{minor}{Minor compute capability version.} -\end{description} - - -\cvCppFunc{gpu::hasLessOrEqualPtxVersion} -Returns true, if the GPU module has PTX code for the given architecture or older one, otherwise false. - -\cvdefCpp{bool hasLessOrEqualPtxVersion(int major, int minor);} -\begin{description} -\cvarg{major}{Major compute capability version.} -\cvarg{minor}{Minor compute capability version.} -\end{description} - - -\cvCppFunc{gpu::hasGreaterOrEqualPtxVersion} -Returns true, if the GPU module has PTX code for the given architecture or newer one, otherwise false. - -\cvdefCpp{bool hasGreaterOrEqualPtxVersion(int major, int minor);} -\begin{description} -\cvarg{major}{Major compute capability version.} -\cvarg{minor}{Minor compute capability version.} -\end{description} - - -\cvCppFunc{gpu::hasCubinVersion} -Returns true, if the GPU module has CUBIN code for the given architecture, otherwise false. - -\cvdefCpp{bool hasCubinVersion(int major, int minor);} -\begin{description} -\cvarg{major}{Major compute capability version.} -\cvarg{minor}{Minor compute capability version.} -\end{description} - - -\cvCppFunc{gpu::hasGreaterOrEqualCubinVersion} -Returns true, if the GPU module has CUBIN code for the given architecture or newer one, otherwise false. - -\cvdefCpp{bool hasGreaterOrEqualCubinVersion(int major, int minor);} -\begin{description} -\cvarg{major}{Major compute capability version.} -\cvarg{minor}{Minor compute capability version.} +\cvarg{feature}{Feature to be checked. Available alternatives: +\begin{itemize} +\item NATIVE\_DOUBLE Native double operations support +\item ATOMICS Atomic operations support +\end{itemize}} \end{description} - -\cvCppFunc{gpu::hasVersion} -Returns true, if the GPU module has PTX or CUBIN code for the given architecture, otherwise false. - -\cvdefCpp{bool hasVersion(int major, int minor);} -\begin{description} -\cvarg{major}{Major compute capability version.} -\cvarg{minor}{Minor compute capability version.} -\end{description} - - -\cvCppFunc{gpu::hasGreaterOrEqualVersion} -Returns true, if the GPU module has PTX or CUBIN code for the given architecture or newer one, otherwise false. - -\cvdefCpp{bool hasGreaterOrEqualVersion(int major, int minor);} +There are a set of methods for checking whether the module contains intermediate (PTX) or binary GPU code for the given architecture: +\cvdefCpp{ +static bool has(int major, int minor);\newline +static bool hasPtx(int major, int minor);\newline +static bool hasBin(int major, int minor);\newline +static bool hasEqualOrLessPtx(int major, int minor);\newline +static bool hasEqualOrGreater(int major, int minor);\newline +static bool hasEqualOrGreaterPtx(int major, int minor);\newline +static bool hasEqualOrGreaterBin(int major, int minor);} \begin{description} \cvarg{major}{Major compute capability version.} \cvarg{minor}{Minor compute capability version.} \end{description} - \cvCppFunc{gpu::isCompatibleWith} Returns true, if the GPU module is built with PTX or CUBIN compatible with the given GPU device, otherwise false. diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index f490f55..ea07a74 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -64,6 +64,27 @@ namespace cv CV_EXPORTS void setDevice(int device); CV_EXPORTS int getDevice(); + enum GpuFeature + { + NATIVE_DOUBLE, + ATOMICS + }; + + class CV_EXPORTS TargetArchs + { + public: + static bool builtWith(GpuFeature feature); + static bool has(int major, int minor); + static bool hasPtx(int major, int minor); + static bool hasBin(int major, int minor); + static bool hasEqualOrLessPtx(int major, int minor); + static bool hasEqualOrGreater(int major, int minor); + static bool hasEqualOrGreaterPtx(int major, int minor); + static bool hasEqualOrGreaterBin(int major, int minor); + private: + TargetArchs(); + }; + CV_EXPORTS void getComputeCapability(int device, int& major, int& minor); CV_EXPORTS int getNumberOfSMs(int device); @@ -72,16 +93,6 @@ namespace cv CV_EXPORTS bool hasNativeDoubleSupport(int device); CV_EXPORTS bool hasAtomicsSupport(int device); - CV_EXPORTS bool hasPtxVersion(int major, int minor); - CV_EXPORTS bool hasLessOrEqualPtxVersion(int major, int minor); - CV_EXPORTS bool hasGreaterOrEqualPtxVersion(int major, int minor); - - CV_EXPORTS bool hasCubinVersion(int major, int minor); - CV_EXPORTS bool hasGreaterOrEqualCubinVersion(int major, int minor); - - CV_EXPORTS bool hasVersion(int major, int minor); - CV_EXPORTS bool hasGreaterOrEqualVersion(int major, int minor); - CV_EXPORTS bool isCompatibleWith(int device); //////////////////////////////// Error handling //////////////////////// diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index 17bd150..a1b7fcd 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -162,49 +162,59 @@ namespace } -CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) +CV_EXPORTS bool cv::gpu::TargetArchs::builtWith(cv::gpu::GpuFeature feature) { - return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to()); + if (feature == NATIVE_DOUBLE) + return hasEqualOrGreater(1, 3); + if (feature == ATOMICS) + return hasEqualOrGreater(1, 1); + return true; } -CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) +CV_EXPORTS bool cv::gpu::TargetArchs::has(int major, int minor) { - return ::compare(CUDA_ARCH_PTX, major * 10 + minor, - std::less_equal()); + return hasPtx(major, minor) || hasBin(major, minor); } -CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) +CV_EXPORTS bool cv::gpu::TargetArchs::hasPtx(int major, int minor) { - return ::compare(CUDA_ARCH_PTX, major * 10 + minor, - std::greater_equal()); + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to()); } -CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) +CV_EXPORTS bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return ::compare(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to()); } -CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) +CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { - return ::compare(CUDA_ARCH_BIN, major * 10 + minor, - std::greater_equal()); + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, + std::less_equal()); } -CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) +CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { - return hasPtxVersion(major, minor) || hasCubinVersion(major, minor); + return hasEqualOrGreaterPtx(major, minor) || + hasEqualOrGreaterBin(major, minor); } -CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) +CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { - return hasGreaterOrEqualPtxVersion(major, minor) || - hasGreaterOrEqualCubinVersion(major, minor); + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, + std::greater_equal()); +} + + +CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) +{ + return ::compare(CUDA_ARCH_BIN, major * 10 + minor, + std::greater_equal()); } @@ -218,12 +228,12 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) getComputeCapability(device, major, minor); // Check PTX compatibility - if (hasLessOrEqualPtxVersion(major, minor)) + if (TargetArchs::hasEqualOrLessPtx(major, minor)) return true; // Check CUBIN compatibility for (int i = minor; i >= 0; --i) - if (hasCubinVersion(major, i)) + if (TargetArchs::hasBin(major, i)) return true; return false; diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 1b5f79e..3246785 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -170,7 +170,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) ensureSizeIsEnough(buf_size, CV_8U, buf); Caller* callers = multipass_callers; - if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice())) + if (TargetArchs::builtWith(ATOMICS) && hasAtomicsSupport(getDevice())) callers = singlepass_callers; Caller caller = callers[src.depth()]; @@ -206,7 +206,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) sqrSumCaller, sqrSumCaller, 0 }; Caller* callers = multipass_callers; - if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice())) + if (TargetArchs::builtWith(ATOMICS) && hasAtomicsSupport(getDevice())) callers = singlepass_callers; Size buf_size; @@ -283,7 +283,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); - CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && + CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) && hasNativeDoubleSupport(getDevice()))); double minVal_; if (!minVal) minVal = &minVal_; @@ -296,7 +296,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp if (mask.empty()) { Caller* callers = multipass_callers; - if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice())) + if (TargetArchs::builtWith(ATOMICS) && hasAtomicsSupport(getDevice())) callers = singlepass_callers; Caller caller = callers[src.type()]; @@ -306,7 +306,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp else { MaskedCaller* callers = masked_multipass_callers; - if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice())) + if (TargetArchs::builtWith(ATOMICS) && hasAtomicsSupport(getDevice())) callers = masked_singlepass_callers; MaskedCaller caller = callers[src.type()]; @@ -382,7 +382,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); - CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && + CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) && hasNativeDoubleSupport(getDevice()))); double minVal_; if (!minVal) minVal = &minVal_; @@ -399,7 +399,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point if (mask.empty()) { Caller* callers = multipass_callers; - if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice())) + if (TargetArchs::builtWith(ATOMICS) && hasAtomicsSupport(getDevice())) callers = singlepass_callers; Caller caller = callers[src.type()]; @@ -409,7 +409,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point else { MaskedCaller* callers = masked_multipass_callers; - if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice())) + if (TargetArchs::builtWith(ATOMICS) && hasAtomicsSupport(getDevice())) callers = masked_singlepass_callers; MaskedCaller caller = callers[src.type()]; @@ -463,7 +463,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) CV_Assert(src.channels() == 1); - CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && + CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) && hasNativeDoubleSupport(getDevice()))); Size buf_size; @@ -471,7 +471,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ensureSizeIsEnough(buf_size, CV_8U, buf); Caller* callers = multipass_callers; - if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice())) + if (TargetArchs::builtWith(ATOMICS) && hasAtomicsSupport(getDevice())) callers = singlepass_callers; Caller caller = callers[src.type()]; diff --git a/modules/gpu/src/split_merge.cpp b/modules/gpu/src/split_merge.cpp index 9a21200..b3b2fa4 100644 --- a/modules/gpu/src/split_merge.cpp +++ b/modules/gpu/src/split_merge.cpp @@ -72,8 +72,8 @@ namespace cv { namespace gpu { namespace split_merge { CV_Assert(src); CV_Assert(n > 0); - - bool double_ok = hasGreaterOrEqualVersion(1, 3) && + + bool double_ok = TargetArchs::builtWith(NATIVE_DOUBLE) && hasNativeDoubleSupport(getDevice()); CV_Assert(src[0].depth() != CV_64F || double_ok); @@ -116,7 +116,7 @@ namespace cv { namespace gpu { namespace split_merge { CV_Assert(dst); - bool double_ok = hasGreaterOrEqualVersion(1, 3) && + bool double_ok = TargetArchs::builtWith(NATIVE_DOUBLE) && hasNativeDoubleSupport(getDevice()); CV_Assert(src.depth() != CV_64F || double_ok); diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp index 3bc0e73..338b8e3 100644 --- a/tests/gpu/src/arithm.cpp +++ b/tests/gpu/src/arithm.cpp @@ -659,7 +659,7 @@ struct CV_GpuMinMaxTest: public CvTest { try { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); int depth_end = double_ok ? CV_64F : CV_32F; @@ -793,7 +793,7 @@ struct CV_GpuMinMaxLocTest: public CvTest { try { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); int depth_end = double_ok ? CV_64F : CV_32F; diff --git a/tests/gpu/src/bitwise_oper.cpp b/tests/gpu/src/bitwise_oper.cpp index 15428d8..a3379b3 100644 --- a/tests/gpu/src/bitwise_oper.cpp +++ b/tests/gpu/src/bitwise_oper.cpp @@ -59,7 +59,7 @@ struct CV_GpuBitwiseTest: public CvTest { int rows, cols; - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); int depth_end = double_ok ? CV_64F : CV_32F; diff --git a/tests/gpu/src/match_template.cpp b/tests/gpu/src/match_template.cpp index b5dd9e4..431d547 100644 --- a/tests/gpu/src/match_template.cpp +++ b/tests/gpu/src/match_template.cpp @@ -64,7 +64,7 @@ struct CV_GpuMatchTemplateTest: CvTest { try { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); if (!double_ok) { @@ -244,7 +244,7 @@ struct CV_GpuMatchTemplateFindPatternInBlackTest: CvTest { try { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); if (!double_ok) { diff --git a/tests/gpu/src/meanshift.cpp b/tests/gpu/src/meanshift.cpp index a6338f9..7e19a37 100644 --- a/tests/gpu/src/meanshift.cpp +++ b/tests/gpu/src/meanshift.cpp @@ -59,7 +59,7 @@ struct CV_GpuMeanShiftTest : public CvTest int major, minor; cv::gpu::getComputeCapability(cv::gpu::getDevice(), major, minor); - if (cv::gpu::hasGreaterOrEqualVersion(2, 0) && major >= 2) + if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && major >= 2) img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png"); else img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png"); @@ -205,7 +205,7 @@ struct CV_GpuMeanShiftProcTest : public CvTest int major, minor; cv::gpu::getComputeCapability(cv::gpu::getDevice(), major, minor); - if (cv::gpu::hasGreaterOrEqualVersion(2, 0) && major >= 2) + if (cv::gpu::TargetArchs::hasEqualOrGreater(2, 0) && major >= 2) fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ); else fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ); diff --git a/tests/gpu/src/mssegmentation.cpp b/tests/gpu/src/mssegmentation.cpp index 70093c6..268cdb1 100644 --- a/tests/gpu/src/mssegmentation.cpp +++ b/tests/gpu/src/mssegmentation.cpp @@ -71,7 +71,7 @@ struct CV_GpuMeanShiftSegmentationTest : public CvTest { { stringstream path; path << ts->get_data_path() << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize; - if (cv::gpu::hasGreaterOrEqualVersion(2, 0) && major >= 2) + if (TargetArchs::hasEqualOrGreater(2, 0) && major >= 2) path << ".png"; else path << "_CC1X.png"; diff --git a/tests/gpu/src/split_merge.cpp b/tests/gpu/src/split_merge.cpp index 2a929f6..6a09dff 100644 --- a/tests/gpu/src/split_merge.cpp +++ b/tests/gpu/src/split_merge.cpp @@ -63,7 +63,7 @@ struct CV_MergeTest : public CvTest void CV_MergeTest::can_merge(size_t rows, size_t cols) { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); size_t depth_end = double_ok ? CV_64F : CV_32F; @@ -105,7 +105,7 @@ void CV_MergeTest::can_merge(size_t rows, size_t cols) void CV_MergeTest::can_merge_submatrixes(size_t rows, size_t cols) { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); size_t depth_end = double_ok ? CV_64F : CV_32F; @@ -179,7 +179,7 @@ struct CV_SplitTest : public CvTest void CV_SplitTest::can_split(size_t rows, size_t cols) { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); size_t depth_end = double_ok ? CV_64F : CV_32F; @@ -221,7 +221,7 @@ void CV_SplitTest::can_split(size_t rows, size_t cols) void CV_SplitTest::can_split_submatrix(size_t rows, size_t cols) { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); size_t depth_end = double_ok ? CV_64F : CV_32F; @@ -292,7 +292,7 @@ struct CV_SplitMergeTest : public CvTest }; void CV_SplitMergeTest::can_split_merge(size_t rows, size_t cols) { - bool double_ok = gpu::hasGreaterOrEqualVersion(1, 3) && + bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && gpu::hasNativeDoubleSupport(gpu::getDevice()); size_t depth_end = double_ok ? CV_64F : CV_32F; -- 2.7.4