Avoid using heavy API to query single attribution (#3179)
authorghostplant <ghostplant@qq.com>
Tue, 14 May 2019 17:22:33 +0000 (01:22 +0800)
committerTianqi Chen <tqchen@users.noreply.github.com>
Tue, 14 May 2019 17:22:33 +0000 (10:22 -0700)
src/codegen/opt/build_cuda_on.cc
src/runtime/cuda/cuda_device_api.cc

index fda239f..e2a788f 100644 (file)
@@ -84,12 +84,13 @@ std::string NVRTCCompile(const std::string& code, bool include_path = false) {
   std::vector<std::string> compile_params;
   std::vector<const char*> param_cstrings{};
   nvrtcProgram prog;
-  cudaDeviceProp device_prop;
   std::string cc = "30";
-  cudaError_t e = cudaGetDeviceProperties(&device_prop, 0);
+  int major, minor;
+  cudaError_t e1 = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, 0);
+  cudaError_t e2 = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, 0);
 
-  if (e == cudaSuccess) {
-    cc = std::to_string(device_prop.major) + std::to_string(device_prop.minor);
+  if (e1 == cudaSuccess && e2 == cudaSuccess) {
+    cc = std::to_string(major) + std::to_string(minor);
   } else {
     LOG(WARNING) << "cannot detect compute capability from your device, "
                  << "fall back to compute_30.";
index f812156..f5d660c 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <dmlc/thread_local.h>
 #include <tvm/runtime/registry.h>
+#include <cuda.h>
 #include <cuda_runtime.h>
 #include "cuda_common.h"
 
@@ -73,9 +74,10 @@ class CUDADeviceAPI final : public DeviceAPI {
         return;
       }
       case kDeviceName: {
-        cudaDeviceProp props;
-        CUDA_CALL(cudaGetDeviceProperties(&props, ctx.device_id));
-        *rv = std::string(props.name);
+        std::string name(256, 0);
+        CUDA_DRIVER_CALL(cuDeviceGetName(&name[0], name.size(), ctx.device_id));
+        name.resize(strlen(name.c_str()));
+        *rv = std::move(name);
         return;
       }
       case kMaxClockRate: {