Make autograd engine compatible with hip

author Junjie Bai <bai@in.tum.de>

Fri, 7 Dec 2018 08:07:05 +0000 (00:07 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Fri, 7 Dec 2018 08:12:06 +0000 (00:12 -0800)
author Junjie Bai <bai@in.tum.de>
Fri, 7 Dec 2018 08:07:05 +0000 (00:07 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 7 Dec 2018 08:12:06 +0000 (00:12 -0800)
diff --git a/tools/amd_build/build_amd.py b/tools/amd_build/build_amd.py

index 227ca25..eeda8d8 100644 (file)
--- a/tools/amd_build/build_amd.py
+++ b/tools/amd_build/build_amd.py
@@ -52,7 +52,9 @@ ignores = [
      "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
      "caffe2/operators/pool_op_cudnn.cu",
      '**/hip/**',
+    # These files are compatible with both cuda and hip
      "aten/src/ATen/core/*",
+    "torch/csrc/autograd/engine.cpp"
  ]
  
  json_settings = os.path.join(amd_build_dir, "disabled_features.json")
@@ -64,8 +66,14 @@ if not args.out_of_place_only:
          subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir)
  
      # Make various replacements inside AMD_BUILD/torch directory
-    ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp",
-                    "csrc/cuda/cuda_check.h"]
+    ignore_files = [
+        # These files use nvrtc, hip doesn't have equivalent
+        "csrc/autograd/profiler.h",
+        "csrc/autograd/profiler.cpp",
+        "csrc/cuda/cuda_check.h",
+        # These files are compatible with both cuda and hip
+        "csrc/autograd/engine.cpp"
+    ]
      for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")):
          for filename in files:
              if filename.endswith(".cpp") or filename.endswith(".h"):
diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp

index 01e24be..93317d7 100644 (file)
--- a/torch/csrc/autograd/engine.cpp
+++ b/torch/csrc/autograd/engine.cpp
@@ -27,11 +27,16 @@
  #include <queue>
  #include <TH/TH.h>
  
+#if defined(USE_CUDA) || defined(USE_ROCM)
  #ifdef USE_CUDA
  #include <cuda.h>
+#endif  // USE_CUDA
+#ifdef USE_ROCM
+#include <hip/hip_runtime.h>
+#endif  // USE_ROCM
  #include <THC/THC.h>
  #include <ATen/cuda/CUDAGuard.h>
-#endif
+#endif  // defined(USE_CUDA) || defined(USE_ROCM)
  
  namespace torch { namespace autograd {
  
@@ -206,7 +211,7 @@ Engine::~Engine() = default;
  // not CUDA.
  auto Engine::thread_init(int device) -> void {
    THInferNumThreads();
-#ifdef USE_CUDA
+#if defined(USE_CUDA) || defined(USE_ROCM)
    // NB: We MUST NOT construct the guard for device -1,
    // as in some settings we compile with USE_CUDA, but
    // have lazy stubs for CUDA functionality (so actually
@@ -630,10 +635,25 @@ auto Engine::ready_queue(int device) -> ReadyQueue& {
  auto Engine::start_threads() -> void {
    int num_devices = 0;
  #ifdef USE_CUDA
-  // check for case of compiled with CUDA but no available devices
-  if (cudaGetDeviceCount(&num_devices) != cudaSuccess) {
-    cudaGetLastError();
-    num_devices = 0;
+  {
+    int num_cuda_devices = 0;
+    // check for case of compiled with CUDA but no available devices
+    if (cudaGetDeviceCount(&num_cuda_devices) != cudaSuccess) {
+      cudaGetLastError();
+    } else {
+      num_devices += num_cuda_devices;
+    }
+  }
+#endif
+#ifdef USE_ROCM
+  {
+    int num_hip_devices = 0;
+    // check for case of compiled with CUDA but no available devices
+    if (hipGetDeviceCount(&num_hip_devices) != hipSuccess) {
+      hipGetLastError();
+    } else {
+      num_devices += num_hip_devices;
+    }
    }
  #endif
    // One for CPU, plus one for every GPU device
author	Junjie Bai <bai@in.tum.de>
	Fri, 7 Dec 2018 08:07:05 +0000 (00:07 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Fri, 7 Dec 2018 08:12:06 +0000 (00:12 -0800)
tools/amd_build/build_amd.py		patch \| blob \| history
torch/csrc/autograd/engine.cpp		patch \| blob \| history