From 1a0d41fb530d87179da15a26d105f05f7f591cf4 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Tue, 18 Jan 2011 14:52:35 +0000 Subject: [PATCH] added checkPtxVersion into gpu module --- CMakeLists.txt | 24 ++++---- cvconfig.h.cmake | 12 ++-- doc/gpu_initialization.tex | 13 ++++- modules/gpu/include/opencv2/gpu/gpu.hpp | 4 +- modules/gpu/src/initialization.cpp | 100 ++++++++++++++++++++++++-------- 5 files changed, 107 insertions(+), 46 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0deabb1..b8a4409 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -709,19 +709,19 @@ if (WITH_CUDA) set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability") set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES}) - string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES}) - string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES}) - string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES}) - string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES}) - string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES}) - string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES}) + string(REGEX MATCH "1\\.0" STR_OPENCV_GPU_CUDA_ARCH_10 ${CUDA_COMPUTE_CAPABILITIES}) + string(REGEX MATCH "1\\.1" STR_OPENCV_GPU_CUDA_ARCH_11 ${CUDA_COMPUTE_CAPABILITIES}) + string(REGEX MATCH "1\\.2" STR_OPENCV_GPU_CUDA_ARCH_12 ${CUDA_COMPUTE_CAPABILITIES}) + string(REGEX MATCH "1\\.3" STR_OPENCV_GPU_CUDA_ARCH_13 ${CUDA_COMPUTE_CAPABILITIES}) + string(REGEX MATCH "2\\.0" STR_OPENCV_GPU_CUDA_ARCH_20 ${CUDA_COMPUTE_CAPABILITIES}) + string(REGEX MATCH "2\\.1" STR_OPENCV_GPU_CUDA_ARCH_21 ${CUDA_COMPUTE_CAPABILITIES}) - string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10) - string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11) - string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12) - string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13) - string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20) - string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21) + string(COMPARE EQUAL "1.0" "${STR_OPENCV_GPU_CUDA_ARCH_10}" OPENCV_GPU_CUDA_ARCH_10) + string(COMPARE EQUAL "1.1" "${STR_OPENCV_GPU_CUDA_ARCH_11}" OPENCV_GPU_CUDA_ARCH_11) + string(COMPARE EQUAL "1.2" "${STR_OPENCV_GPU_CUDA_ARCH_12}" OPENCV_GPU_CUDA_ARCH_12) + string(COMPARE EQUAL "1.3" "${STR_OPENCV_GPU_CUDA_ARCH_13}" OPENCV_GPU_CUDA_ARCH_13) + string(COMPARE EQUAL "2.0" "${STR_OPENCV_GPU_CUDA_ARCH_20}" OPENCV_GPU_CUDA_ARCH_20) + string(COMPARE EQUAL "2.1" "${STR_OPENCV_GPU_CUDA_ARCH_21}" OPENCV_GPU_CUDA_ARCH_21) set(CUDA_NVCC_FLAGS_NUM "") diff --git a/cvconfig.h.cmake b/cvconfig.h.cmake index 9a9a6a3..b1de832 100644 --- a/cvconfig.h.cmake +++ b/cvconfig.h.cmake @@ -164,22 +164,22 @@ #cmakedefine HAVE_CUDA /* The project was generated with 1.0 NVIDIA device arch support */ -#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10 +#cmakedefine OPENCV_GPU_CUDA_ARCH_10 /* The project was generated with 1.1 NVIDIA device arch support */ -#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11 +#cmakedefine OPENCV_GPU_CUDA_ARCH_11 /* The project was generated with 1.2 NVIDIA device arch support */ -#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12 +#cmakedefine OPENCV_GPU_CUDA_ARCH_12 /* The project was generated with 1.3 NVIDIA device arch support */ -#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13 +#cmakedefine OPENCV_GPU_CUDA_ARCH_13 /* The project was generated with 2.0 NVIDIA device arch support */ -#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20 +#cmakedefine OPENCV_GPU_CUDA_ARCH_20 /* The project was generated with 2.1 NVIDIA device arch support */ -#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21 +#cmakedefine OPENCV_GPU_CUDA_ARCH_21 /* VideoInput library */ #cmakedefine HAVE_VIDEOINPUT diff --git a/doc/gpu_initialization.tex b/doc/gpu_initialization.tex index 7c341ab..f4f8cac 100644 --- a/doc/gpu_initialization.tex +++ b/doc/gpu_initialization.tex @@ -69,11 +69,20 @@ Returns true, if the specified GPU has atomics support, otherwise false. \end{description} -\cvCppFunc{gpu::hasPtxFor} +\cvCppFunc{gpu::checkPtxVersion} Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false. -\cvdefCpp{bool hasPtxFor(int major, int minor);} +\cvdefCpp{template $<$unsigned int cmp\_op$>$\newline +bool checkPtxVersion(int major, int minor);} \begin{description} +\cvarg{cmp\_op}{Comparison operation: +\begin{description} +\cvarg{CMP\_EQ}{Return true, if at least one of GPU module PTX versions matches the given one, otherwise false} +\cvarg{CMP\_LT}{Return true, if at least one of GPU module PTX versions is less than the given one, otherwise false} +\cvarg{CMP\_LE}{Return true, if at least one of GPU module PTX versions is less or equal to the given one, otherwise false} +\cvarg{CMP\_GT}{Return true, if at least one of GPU module PTX versions is greater than the given one, otherwise false} +\cvarg{CMP\_GE}{Return true, if at least one of GPU module PTX versions is greater or equal to the given one, otherwise false} +\end{description}} \cvarg{major}{Major CC version.} \cvarg{minor}{Minor CC version.} \end{description} diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 7618a13..61f5d66 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -72,8 +72,8 @@ namespace cv CV_EXPORTS bool hasNativeDoubleSupport(int device); CV_EXPORTS bool hasAtomicsSupport(int device); - //! Checks if the GPU module was built with PTX support (-arch) of the given CC - CV_EXPORTS bool hasPtxFor(int major, int minor); + template + CV_EXPORTS bool checkPtxVersion(int major, int minor); //! Checks if the GPU module is PTX compatible with the given NVIDIA device CV_EXPORTS bool isCompatibleWith(int device); diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index 0ce9f6a..a21fef4 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -68,6 +68,7 @@ CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() return count; } + CV_EXPORTS string cv::gpu::getDeviceName(int device) { cudaDeviceProp prop; @@ -75,10 +76,13 @@ CV_EXPORTS string cv::gpu::getDeviceName(int device) return prop.name; } + CV_EXPORTS void cv::gpu::setDevice(int device) { cudaSafeCall( cudaSetDevice( device ) ); } + + CV_EXPORTS int cv::gpu::getDevice() { int device; @@ -86,6 +90,7 @@ CV_EXPORTS int cv::gpu::getDevice() return device; } + CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor) { cudaDeviceProp prop; @@ -95,6 +100,7 @@ CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor minor = prop.minor; } + CV_EXPORTS int cv::gpu::getNumberOfSMs(int device) { cudaDeviceProp prop; @@ -108,6 +114,7 @@ CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& free, size_t& total) cudaSafeCall( cudaMemGetInfo( &free, &total ) ); } + CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device) { int major, minor; @@ -115,6 +122,7 @@ CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device) return major > 1 || (major == 1 && minor >= 3); } + CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device) { int major, minor; @@ -122,36 +130,90 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device) return major > 1 || (major == 1 && minor >= 1); } -CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor) + +namespace +{ + template + bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2); + + template <> + bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2) + { + return lhs1 == rhs1 && lhs2 == rhs2; + } + + template <> + bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2) + { + return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 > rhs2); + } + + template <> + bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2) + { + return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2); + } + + template <> + bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2) + { + return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 < rhs2); + } + + + template <> + bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2) + { + return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2); + } + + template <> + bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2) + { + return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2); + } +} + + +template +CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor) { -#ifdef HAVE_PTX_FOR_NVIDIA_CC_10 - if (major == 1 && minor == 0) return true; +#ifdef OPENCV_GPU_CUDA_ARCH_10 + if (comparePairs(1, 0, major, minor)) return true; #endif -#ifdef HAVE_PTX_FOR_NVIDIA_CC_11 - if (major == 1 && minor == 1) return true; +#ifdef OPENCV_GPU_CUDA_ARCH_11 + if (comparePairs(1, 1, major, minor)) return true; #endif -#ifdef HAVE_PTX_FOR_NVIDIA_CC_12 - if (major == 1 && minor == 2) return true; +#ifdef OPENCV_GPU_CUDA_ARCH_12 + if (comparePairs(1, 2, major, minor)) return true; #endif -#ifdef HAVE_PTX_FOR_NVIDIA_CC_13 - if (major == 1 && minor == 3) return true; +#ifdef OPENCV_GPU_CUDA_ARCH_13 + if (comparePairs(1, 3, major, minor)) return true; #endif -#ifdef HAVE_PTX_FOR_NVIDIA_CC_20 - if (major == 2 && minor == 0) return true; +#ifdef OPENCV_GPU_CUDA_ARCH_20 + if (comparePairs(2, 0, major, minor)) return true; #endif -#ifdef HAVE_PTX_FOR_NVIDIA_CC_21 - if (major == 2 && minor == 1) return true; +#ifdef OPENCV_GPU_CUDA_ARCH_21 + if (comparePairs(2, 1, major, minor)) return true; #endif return false; } +template CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor); +template CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor); +template CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor); +template CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor); +template CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor); +template CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor); + + CV_EXPORTS bool isCompatibleWith(int device) { // According to the CUDA C Programming Guide Version 3.2: "PTX code @@ -161,17 +223,7 @@ CV_EXPORTS bool isCompatibleWith(int device) int major, minor; getComputeCapability(device, major, minor); - for (; major >= 1; --major) - { - for (; minor >= 0; --minor) - { - if (hasPtxFor(major, minor)) - return true; - } - minor = 9; - } - - return false; + return checkPtxVersion(major, minor); } #endif -- 2.7.4