Fix static linkage cases and NO_DISTRIBUTED=1 + CUDA (#16705) (#17337)
authorSoumith Chintala <soumith@gmail.com>
Fri, 22 Feb 2019 00:05:16 +0000 (16:05 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 22 Feb 2019 00:12:02 +0000 (16:12 -0800)
Summary:
Attempt #2 (attempt 1 is https://github.com/pytorch/pytorch/pull/16705 and got reverted because of CI failures)

Fixes https://github.com/pytorch/pytorch/issues/14805
Pull Request resolved: https://github.com/pytorch/pytorch/pull/17337

Differential Revision: D14175626

Pulled By: soumith

fbshipit-source-id: 66f2e10e219a1bf88ed342ec5c89da6f2994d8eb

c10/cuda/CUDAException.h
cmake/Dependencies.cmake
cmake/public/cuda.cmake

index aff5ee4..3836865 100644 (file)
@@ -14,6 +14,7 @@
   do {                                                     \
     cudaError_t __err = EXPR;                              \
     if (__err != cudaSuccess) {                            \
+      cudaGetLastError();                                 \
       AT_ERROR("CUDA error: ", cudaGetErrorString(__err)); \
     }                                                      \
   } while (0)
index 148c110..d4fd02f 100644 (file)
@@ -744,7 +744,12 @@ if(USE_CUDA)
       caffe2_update_option(USE_NVRTC OFF)
     endif()
     if(CAFFE2_USE_CUDNN)
-      list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn)
+      IF(CUDNN_STATIC_LINKAGE)
+       LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS
+         caffe2::cudnn "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" "dl")
+      ELSE()
+       list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn)
+      ENDIF()
     else()
       caffe2_update_option(USE_CUDNN OFF)
     endif()
@@ -1314,7 +1319,6 @@ if (NOT BUILD_ATEN_MOBILE)
     SET(AT_CUDA_ENABLED 0)
   else()
     SET(AT_CUDA_ENABLED 1)
-    find_package(CUDA 5.5 REQUIRED)
   endif()
 
   IF (NOT AT_CUDA_ENABLED OR NOT CUDNN_FOUND)
index 394c9f2..43665df 100644 (file)
@@ -9,6 +9,12 @@ endif()
 # release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache.
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix)
 
+ # we dont want to statically link cudart, because we rely on it's dynamic linkage in
+ # python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName).
+ # Technically, we can link cudart here statically, and link libtorch_python.so
+ # to a dynamic libcudart.so, but that's just wasteful
+SET(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
+
 # Find CUDA.
 find_package(CUDA)
 if(NOT CUDA_FOUND)
@@ -89,6 +95,9 @@ endif()
 
 if(DEFINED ENV{CUDNN_LIBRARY})
   set(CUDNN_LIBRARY $ENV{CUDNN_LIBRARY})
+  if (CUDNN_LIBRARY MATCHES ".*cudnn_static.a")
+    SET(CUDNN_STATIC_LINKAGE ON)
+  endif()
 else()
   find_library(CUDNN_LIBRARY ${CUDNN_LIBNAME}
     HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}
@@ -186,7 +195,7 @@ add_library(caffe2::cudart INTERFACE IMPORTED)
 if(CAFFE2_STATIC_LINK_CUDA)
     set_property(
         TARGET caffe2::cudart PROPERTY INTERFACE_LINK_LIBRARIES
-        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a" rt)
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a" rt dl)
 else()
     set_property(
         TARGET caffe2::cudart PROPERTY INTERFACE_LINK_LIBRARIES