added CUDA_ARCH_FEATUERS define (for correct handling of BIN(PTX) cases)
authorAlexey Spizhevoy <no@email>
Tue, 1 Feb 2011 12:28:39 +0000 (12:28 +0000)
committerAlexey Spizhevoy <no@email>
Tue, 1 Feb 2011 12:28:39 +0000 (12:28 +0000)
CMakeLists.txt
modules/gpu/src/initialization.cpp

index df8e9a216561175b4ca9888760d591366f728140..aebdcd296736d7924b6bdcac2e8bc863b8d23bf9 100644 (file)
@@ -711,44 +711,49 @@ if(WITH_CUDA)
         set(CUDA_ARCH_BIN "1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")\r
         set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")              \r
         \r
-        # These variables are used in config templates\r
         string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")\r
         string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")\r
         \r
-        # Ckeck if user specified 1.0 compute capability\r
+        # Ckeck if user specified 1.0 compute capability: we don't support it\r
         string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}")\r
         set(CUDA_ARCH_BIN_OR_PTX_10 0)\r
         if(NOT ${HAS_ARCH_10} STREQUAL "")\r
             set(CUDA_ARCH_BIN_OR_PTX_10 1)\r
         endif()\r
         \r
-        # Flags to be set\r
+        # NVCC flags to be set\r
         set(NVCC_FLAGS_EXTRA "")      \r
         \r
-        # These variables are passed into the template\r
+        # These vars will be passed into the templates\r
         set(OPENCV_CUDA_ARCH_BIN "") \r
         set(OPENCV_CUDA_ARCH_PTX "") \r
+        set(OPENCV_CUDA_ARCH_FEATURES "")        \r
         \r
-        # Tell nvcc to add binaries for the specified GPUs\r
+        # Tell NVCC to add binaries for the specified GPUs\r
         string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}")\r
         foreach(ARCH IN LISTS ARCH_LIST)\r
             if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)")\r
+                # User explicitly specified PTX for the concrete BIN\r
                 set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})\r
                 set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}")\r
+                set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${CMAKE_MATCH_2}")\r
             else()\r
+                # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN                \r
                 set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})\r
                 set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}")\r
+                set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")\r
             endif()\r
         endforeach()\r
         \r
-        # Tell nvcc to add PTX intermediate code for the specified architectures\r
+        # Tell NVCC to add PTX intermediate code for the specified architectures\r
         string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")\r
         foreach(ARCH IN LISTS ARCH_LIST)\r
             set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})\r
             set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")\r
-        endforeach()               \r
+            set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")\r
+        endforeach()\r
         \r
-        # Wil; be processed in other scripts\r
+        # These vars will be processed in other scripts\r
         set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})        \r
         set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")\r
         \r
index 9f2cc5d69d95e9784c02759fbb499cf5384a121b..d11b95c520ff18c60cd7300f9a47ed2e04afaa27 100644 (file)
@@ -72,9 +72,9 @@ namespace
 CV_EXPORTS bool cv::gpu::TargetArchs::builtWith(cv::gpu::GpuFeature feature)\r
 {\r
     if (feature == NATIVE_DOUBLE)\r
-        return hasEqualOrGreater(1, 3);\r
+        return ::compareToSet(CUDA_ARCH_FEATURES, 13, std::greater_equal<int>());\r
     if (feature == ATOMICS)\r
-        return hasEqualOrGreater(1, 1);\r
+        return ::compareToSet(CUDA_ARCH_FEATURES, 11, std::greater_equal<int>());\r
     return true;\r
 }\r
 \r