message(STATUS "CUDA detected: " ${CUDA_VERSION})\r
\r
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")\r
- set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})\r
-\r
- set(CUDA_NVCC_FLAGS_NUM "")\r
-\r
- while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")\r
- string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})\r
- string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})\r
- string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})\r
- list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})\r
- string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})\r
- string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)\r
- endwhile()\r
-\r
- set (OpenCV_CUDA_CC "")\r
- set (loop_var "")\r
- foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)\r
- set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})\r
- set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})\r
- endforeach()\r
-\r
- ### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})\r
- endif()\r
+ set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})\r
+ \r
+ string(REGEX MATCH "1\\.0" NVIDIA_CC_10 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "1\\.1" NVIDIA_CC_11 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "1\\.2" NVIDIA_CC_12 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "1\\.3" NVIDIA_CC_13 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "2\\.0" NVIDIA_CC_20 ${CUDA_COMPUTE_CAPABILITIES})\r
+ string(REGEX MATCH "2\\.1" NVIDIA_CC_21 ${CUDA_COMPUTE_CAPABILITIES})\r
+ \r
+ string(COMPARE EQUAL "1.0" "${NVIDIA_CC_10}" HAVE_PTX_FOR_NVIDIA_CC_10)\r
+ string(COMPARE EQUAL "1.1" "${NVIDIA_CC_11}" HAVE_PTX_FOR_NVIDIA_CC_11)\r
+ string(COMPARE EQUAL "1.2" "${NVIDIA_CC_12}" HAVE_PTX_FOR_NVIDIA_CC_12)\r
+ string(COMPARE EQUAL "1.3" "${NVIDIA_CC_13}" HAVE_PTX_FOR_NVIDIA_CC_13)\r
+ string(COMPARE EQUAL "2.0" "${NVIDIA_CC_20}" HAVE_PTX_FOR_NVIDIA_CC_20)\r
+ string(COMPARE EQUAL "2.1" "${NVIDIA_CC_21}" HAVE_PTX_FOR_NVIDIA_CC_21)\r
+\r
+ set(CUDA_NVCC_FLAGS_NUM "")\r
+\r
+ while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")\r
+ string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})\r
+ string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})\r
+ string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})\r
+ list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})\r
+ string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})\r
+ string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)\r
+ endwhile()\r
+\r
+ set (OpenCV_CUDA_CC "")\r
+ set (loop_var "")\r
+ foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)\r
+ set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})\r
+ set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})\r
+ endforeach()\r
+\r
+ ### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})\r
+ endif()\r
endif()\r
\r
\r
/* NVidia Cuda Runtime API*/
#cmakedefine HAVE_CUDA
+/* The project was generated with 1.0 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_10
+
+/* The project was generated with 1.1 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_11
+
+/* The project was generated with 1.2 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_12
+
+/* The project was generated with 1.3 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_13
+
+/* The project was generated with 2.0 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_20
+
+/* The project was generated with 2.1 NVIDIA device arch support */
+#cmakedefine HAVE_PTX_FOR_NVIDIA_CC_21
+
/* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT
\cvCppFunc{gpu::hasNativeDoubleSupport}
-Returns true if the specified GPU has native double support, false otherwise.
+Returns true, if the specified GPU has native double support, otherwise false.
\cvdefCpp{bool hasNativeDoubleSupport(int device);}
\begin{description}
\cvCppFunc{gpu::hasAtomicsSupport}
-Returns true if the specified GPU has atomics support, false otherwise.
+Returns true, if the specified GPU has atomics support, otherwise false.
\cvdefCpp{bool hasAtomicsSupport(int device);}
\begin{description}
\cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
-\end{description}
\ No newline at end of file
+\end{description}
+
+
+\cvCppFunc{gpu::hasPtxFor}
+Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false.
+
+\cvdefCpp{bool hasPtxFor(int major, int minor);}
+\begin{description}
+\cvarg{major}{Major CC version.}
+\cvarg{minor}{Minor CC version.}
+\end{description}
+
+
+\cvCppFunc{gpu::isCompatibleWith}
+Returns true, if the GPU module is PTX compatible with the given NVIDIA GPU device, otherwise false.
+
+\cvdefCpp{bool isCompatibleWith(int device);}
+\begin{description}
+\cvarg{device}{GPU index. Can be obtained via \cvCppCross{gpu::getDevice}.}
+\end{description}
+
+According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute capability can always be compiled to binary code of greater or equal compute capability".
+
CV_EXPORTS bool hasNativeDoubleSupport(int device);\r
CV_EXPORTS bool hasAtomicsSupport(int device);\r
\r
+ //! Checks if the GPU module was built with PTX support (-arch) of the given CC\r
+ CV_EXPORTS bool hasPtxFor(int major, int minor);\r
+\r
+ //! Checks if the GPU module is PTX compatible with the given NVIDIA device\r
+ CV_EXPORTS bool isCompatibleWith(int device);\r
+\r
//////////////////////////////// Error handling ////////////////////////\r
\r
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);\r
return major > 1 || (major == 1 && minor >= 1);\r
}\r
\r
+CV_EXPORTS bool cv::gpu::hasPtxFor(int major, int minor) \r
+{\r
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_10\r
+ if (major == 1 && minor == 0) return true;\r
+#endif\r
+\r
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_11\r
+ if (major == 1 && minor == 1) return true;\r
+#endif\r
+\r
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_12\r
+ if (major == 1 && minor == 2) return true;\r
+#endif\r
+\r
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_13\r
+ if (major == 1 && minor == 3) return true;\r
+#endif\r
+\r
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_20\r
+ if (major == 2 && minor == 0) return true;\r
+#endif\r
+\r
+#ifdef HAVE_PTX_FOR_NVIDIA_CC_21\r
+ if (major == 2 && minor == 1) return true;\r
+#endif\r
+\r
+ return false;\r
+}\r
+\r
+\r
+CV_EXPORTS bool isCompatibleWith(int device)\r
+{\r
+ // According to the CUDA C Programming Guide Version 3.2: "PTX code \r
+ // produced for some specific compute capability can always be compiled to\r
+ // binary code of greater or equal compute capability". \r
+\r
+ int major, minor;\r
+ getComputeCapability(device, major, minor);\r
+\r
+ for (; major >= 1; --major)\r
+ {\r
+ for (; minor >= 0; --minor)\r
+ {\r
+ if (hasPtxFor(major, minor))\r
+ return true;\r
+ }\r
+ minor = 9;\r
+ }\r
+\r
+ return false;\r
+}\r
+\r
#endif\r
\r