set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")\r
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})\r
\r
- string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES})\r
- string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES})\r
- string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES})\r
- string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES})\r
- string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES})\r
- string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "1\\.0" STR_OPENCV_GPU_CUDA_ARCH_10 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "1\\.1" STR_OPENCV_GPU_CUDA_ARCH_11 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "1\\.2" STR_OPENCV_GPU_CUDA_ARCH_12 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "1\\.3" STR_OPENCV_GPU_CUDA_ARCH_13 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "2\\.0" STR_OPENCV_GPU_CUDA_ARCH_20 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "2\\.1" STR_OPENCV_GPU_CUDA_ARCH_21 ${CUDA_COMPUTE_CAPABILITIES})\r
\r
- string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10)\r
- string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11)\r
- string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12)\r
- string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13)\r
- string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20)\r
- string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21)\r
+ string(COMPARE EQUAL "1.0" "${STR_OPENCV_GPU_CUDA_ARCH_10}" OPENCV_GPU_CUDA_ARCH_10)\r
+ string(COMPARE EQUAL "1.1" "${STR_OPENCV_GPU_CUDA_ARCH_11}" OPENCV_GPU_CUDA_ARCH_11)\r
+ string(COMPARE EQUAL "1.2" "${STR_OPENCV_GPU_CUDA_ARCH_12}" OPENCV_GPU_CUDA_ARCH_12)\r
+ string(COMPARE EQUAL "1.3" "${STR_OPENCV_GPU_CUDA_ARCH_13}" OPENCV_GPU_CUDA_ARCH_13)\r
+ string(COMPARE EQUAL "2.0" "${STR_OPENCV_GPU_CUDA_ARCH_20}" OPENCV_GPU_CUDA_ARCH_20)\r
+ string(COMPARE EQUAL "2.1" "${STR_OPENCV_GPU_CUDA_ARCH_21}" OPENCV_GPU_CUDA_ARCH_21)\r
\r
set(CUDA_NVCC_FLAGS_NUM "")\r
\r
#cmakedefine HAVE_CUDA
/* The project was generated with 1.0 NVIDIA device arch support */
-#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10
+#cmakedefine OPENCV_GPU_CUDA_ARCH_10
/* The project was generated with 1.1 NVIDIA device arch support */
-#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11
+#cmakedefine OPENCV_GPU_CUDA_ARCH_11
/* The project was generated with 1.2 NVIDIA device arch support */
-#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12
+#cmakedefine OPENCV_GPU_CUDA_ARCH_12
/* The project was generated with 1.3 NVIDIA device arch support */
-#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13
+#cmakedefine OPENCV_GPU_CUDA_ARCH_13
/* The project was generated with 2.0 NVIDIA device arch support */
-#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20
+#cmakedefine OPENCV_GPU_CUDA_ARCH_20
/* The project was generated with 2.1 NVIDIA device arch support */
-#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21
+#cmakedefine OPENCV_GPU_CUDA_ARCH_21
/* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT
\end{description}
-\cvCppFunc{gpu::hasPtxFor}
+\cvCppFunc{gpu::checkPtxVersion}
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
-\cvdefCpp{bool hasPtxFor(int major, int minor);}
+\cvdefCpp{template $<$unsigned int cmp\_op$>$\newline
+bool checkPtxVersion(int major, int minor);}
\begin{description}
+\cvarg{cmp\_op}{Comparison operation:
+\begin{description}
+\cvarg{CMP\_EQ}{Return true, if at least one of GPU module PTX versions matches the given one, otherwise false}
+\cvarg{CMP\_LT}{Return true, if at least one of GPU module PTX versions is less than the given one, otherwise false}
+\cvarg{CMP\_LE}{Return true, if at least one of GPU module PTX versions is less or equal to the given one, otherwise false}
+\cvarg{CMP\_GT}{Return true, if at least one of GPU module PTX versions is greater than the given one, otherwise false}
+\cvarg{CMP\_GE}{Return true, if at least one of GPU module PTX versions is greater or equal to the given one, otherwise false}
+\end{description}}
\cvarg{major}{Major CC version.}
\cvarg{minor}{Minor CC version.}
\end{description}
return count;\r
}\r
\r
+\r
CV_EXPORTS string cv::gpu::getDeviceName(int device)\r
{\r
cudaDeviceProp prop;\r
return prop.name;\r
}\r
\r
+\r
CV_EXPORTS void cv::gpu::setDevice(int device)\r
{\r
cudaSafeCall( cudaSetDevice( device ) );\r
}\r
+\r
+\r
CV_EXPORTS int cv::gpu::getDevice()\r
{\r
int device; \r
return device;\r
}\r
\r
+\r
CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor)\r
{\r
cudaDeviceProp prop; \r
minor = prop.minor;\r
}\r
\r
+\r
CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)\r
{\r
cudaDeviceProp prop;\r
cudaSafeCall( cudaMemGetInfo( &free, &total ) );\r
}\r
\r
+\r
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device)\r
{\r
int major, minor;\r
return major > 1 || (major == 1 && minor >= 3);\r
}\r
\r
+\r
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device) \r
{\r
int major, minor;\r
return major > 1 || (major == 1 && minor >= 1);\r
}\r
\r
-CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor) \r
+\r
+namespace \r
+{\r
+ template <unsigned int cmp_op>\r
+ bool comparePairs(int lhs1, int lhs2, int rhs1, int rhs2);\r
+\r
+ template <>\r
+ bool comparePairs<CMP_EQ>(int lhs1, int lhs2, int rhs1, int rhs2)\r
+ {\r
+ return lhs1 == rhs1 && lhs2 == rhs2;\r
+ }\r
+\r
+ template <>\r
+ bool comparePairs<CMP_GT>(int lhs1, int lhs2, int rhs1, int rhs2)\r
+ {\r
+ return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 > rhs2);\r
+ }\r
+\r
+ template <>\r
+ bool comparePairs<CMP_GE>(int lhs1, int lhs2, int rhs1, int rhs2)\r
+ {\r
+ return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);\r
+ }\r
+\r
+ template <>\r
+ bool comparePairs<CMP_LT>(int lhs1, int lhs2, int rhs1, int rhs2)\r
+ {\r
+ return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 < rhs2);\r
+ }\r
+\r
+\r
+ template <>\r
+ bool comparePairs<CMP_LE>(int lhs1, int lhs2, int rhs1, int rhs2)\r
+ {\r
+ return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);\r
+ }\r
+\r
+ template <>\r
+ bool comparePairs<CMP_NE>(int lhs1, int lhs2, int rhs1, int rhs2)\r
+ {\r
+ return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);\r
+ }\r
+}\r
+\r
+\r
+template <unsigned int cmp_op>\r
+CV_EXPORTS bool cv::gpu::checkPtxVersion(int major, int minor) \r
{\r
-#ifdef HAVE_PTX_FOR_NVIDIA_CC_10\r
- if (major == 1 && minor == 0) return true;\r
+#ifdef OPENCV_GPU_CUDA_ARCH_10\r
+ if (comparePairs<cmp_op>(1, 0, major, minor)) return true;\r
#endif\r
\r
-#ifdef HAVE_PTX_FOR_NVIDIA_CC_11\r
- if (major == 1 && minor == 1) return true;\r
+#ifdef OPENCV_GPU_CUDA_ARCH_11\r
+ if (comparePairs<cmp_op>(1, 1, major, minor)) return true;\r
#endif\r
\r
-#ifdef HAVE_PTX_FOR_NVIDIA_CC_12\r
- if (major == 1 && minor == 2) return true;\r
+#ifdef OPENCV_GPU_CUDA_ARCH_12\r
+ if (comparePairs<cmp_op>(1, 2, major, minor)) return true;\r
#endif\r
\r
-#ifdef HAVE_PTX_FOR_NVIDIA_CC_13\r
- if (major == 1 && minor == 3) return true;\r
+#ifdef OPENCV_GPU_CUDA_ARCH_13\r
+ if (comparePairs<cmp_op>(1, 3, major, minor)) return true;\r
#endif\r
\r
-#ifdef HAVE_PTX_FOR_NVIDIA_CC_20\r
- if (major == 2 && minor == 0) return true;\r
+#ifdef OPENCV_GPU_CUDA_ARCH_20\r
+ if (comparePairs<cmp_op>(2, 0, major, minor)) return true;\r
#endif\r
\r
-#ifdef HAVE_PTX_FOR_NVIDIA_CC_21\r
- if (major == 2 && minor == 1) return true;\r
+#ifdef OPENCV_GPU_CUDA_ARCH_21\r
+ if (comparePairs<cmp_op>(2, 1, major, minor)) return true;\r
#endif\r
\r
return false;\r
}\r
\r
\r
+template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_EQ>(int major, int minor);\r
+template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GT>(int major, int minor);\r
+template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_GE>(int major, int minor);\r
+template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LT>(int major, int minor);\r
+template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_LE>(int major, int minor);\r
+template CV_EXPORTS bool cv::gpu::checkPtxVersion<CMP_NE>(int major, int minor);\r
+\r
+\r
CV_EXPORTS bool isCompatibleWith(int device)\r
{\r
// According to the CUDA C Programming Guide Version 3.2: "PTX code \r
int major, minor;\r
getComputeCapability(device, major, minor);\r
\r
- for (; major >= 1; --major)\r
- {\r
- for (; minor >= 0; --minor)\r
- {\r
- if (hasPtxFor(major, minor))\r
- return true;\r
- }\r
- minor = 9;\r
- }\r
-\r
- return false;\r
+ return checkPtxVersion<CMP_LE>(major, minor);\r
}\r
\r
#endif\r