From 6651fae827fb6becd177fadb35c1913ca74950de Mon Sep 17 00:00:00 2001
From: Junjie Bai <bai@in.tum.de>
Date: Fri, 7 Dec 2018 00:07:05 -0800
Subject: [PATCH] Make autograd engine compatible with hip

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14873

Differential Revision: D13375053

Pulled By: bddppq

fbshipit-source-id: f3051640386667bbf0566856ed433eb83276c39e
---
 tools/amd_build/build_amd.py   | 12 ++++++++++--
 torch/csrc/autograd/engine.cpp | 32 ++++++++++++++++++++++++++------
 2 files changed, 36 insertions(+), 8 deletions(-)
diff --git a/tools/amd_build/build_amd.py b/tools/amd_build/build_amd.py
index 227ca25..eeda8d8 100644
--- a/tools/amd_build/build_amd.py
+++ b/tools/amd_build/build_amd.py
@@ -52,7 +52,9 @@ ignores = [
     "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
     "caffe2/operators/pool_op_cudnn.cu",
     '**/hip/**',
+    # These files are compatible with both cuda and hip
     "aten/src/ATen/core/*",
+    "torch/csrc/autograd/engine.cpp"
 ]
 
 json_settings = os.path.join(amd_build_dir, "disabled_features.json")
@@ -64,8 +66,14 @@ if not args.out_of_place_only:
         subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir)
 
     # Make various replacements inside AMD_BUILD/torch directory
-    ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp",
-                    "csrc/cuda/cuda_check.h"]
+    ignore_files = [
+        # These files use nvrtc, hip doesn't have equivalent
+        "csrc/autograd/profiler.h",
+        "csrc/autograd/profiler.cpp",
+        "csrc/cuda/cuda_check.h",
+        # These files are compatible with both cuda and hip
+        "csrc/autograd/engine.cpp"
+    ]
     for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")):
         for filename in files:
             if filename.endswith(".cpp") or filename.endswith(".h"):
diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
index 01e24be..93317d7 100644
--- a/torch/csrc/autograd/engine.cpp
+++ b/torch/csrc/autograd/engine.cpp
@@ -27,11 +27,16 @@
 #include <queue>
 #include <TH/TH.h>
 
+#if defined(USE_CUDA) || defined(USE_ROCM)
 #ifdef USE_CUDA
 #include <cuda.h>
+#endif  // USE_CUDA
+#ifdef USE_ROCM
+#include <hip/hip_runtime.h>
+#endif  // USE_ROCM
 #include <THC/THC.h>
 #include <ATen/cuda/CUDAGuard.h>
-#endif
+#endif  // defined(USE_CUDA) || defined(USE_ROCM)
 
 namespace torch { namespace autograd {
 
@@ -206,7 +211,7 @@ Engine::~Engine() = default;
 // not CUDA.
 auto Engine::thread_init(int device) -> void {
   THInferNumThreads();
-#ifdef USE_CUDA
+#if defined(USE_CUDA) || defined(USE_ROCM)
   // NB: We MUST NOT construct the guard for device -1,
   // as in some settings we compile with USE_CUDA, but
   // have lazy stubs for CUDA functionality (so actually
@@ -630,10 +635,25 @@ auto Engine::ready_queue(int device) -> ReadyQueue& {
 auto Engine::start_threads() -> void {
   int num_devices = 0;
 #ifdef USE_CUDA
-  // check for case of compiled with CUDA but no available devices
-  if (cudaGetDeviceCount(&num_devices) != cudaSuccess) {
-    cudaGetLastError();
-    num_devices = 0;
+  {
+    int num_cuda_devices = 0;
+    // check for case of compiled with CUDA but no available devices
+    if (cudaGetDeviceCount(&num_cuda_devices) != cudaSuccess) {
+      cudaGetLastError();
+    } else {
+      num_devices += num_cuda_devices;
+    }
+  }
+#endif
+#ifdef USE_ROCM
+  {
+    int num_hip_devices = 0;
+    // check for case of compiled with CUDA but no available devices
+    if (hipGetDeviceCount(&num_hip_devices) != hipSuccess) {
+      hipGetLastError();
+    } else {
+      num_devices += num_hip_devices;
+    }
   }
 #endif
   // One for CPU, plus one for every GPU device
-- 
2.7.4