From 6651fae827fb6becd177fadb35c1913ca74950de Mon Sep 17 00:00:00 2001 From: Junjie Bai Date: Fri, 7 Dec 2018 00:07:05 -0800 Subject: [PATCH] Make autograd engine compatible with hip Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14873 Differential Revision: D13375053 Pulled By: bddppq fbshipit-source-id: f3051640386667bbf0566856ed433eb83276c39e --- tools/amd_build/build_amd.py | 12 ++++++++++-- torch/csrc/autograd/engine.cpp | 32 ++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/tools/amd_build/build_amd.py b/tools/amd_build/build_amd.py index 227ca25..eeda8d8 100644 --- a/tools/amd_build/build_amd.py +++ b/tools/amd_build/build_amd.py @@ -52,7 +52,9 @@ ignores = [ "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu", "caffe2/operators/pool_op_cudnn.cu", '**/hip/**', + # These files are compatible with both cuda and hip "aten/src/ATen/core/*", + "torch/csrc/autograd/engine.cpp" ] json_settings = os.path.join(amd_build_dir, "disabled_features.json") @@ -64,8 +66,14 @@ if not args.out_of_place_only: subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir) # Make various replacements inside AMD_BUILD/torch directory - ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp", - "csrc/cuda/cuda_check.h"] + ignore_files = [ + # These files use nvrtc, hip doesn't have equivalent + "csrc/autograd/profiler.h", + "csrc/autograd/profiler.cpp", + "csrc/cuda/cuda_check.h", + # These files are compatible with both cuda and hip + "csrc/autograd/engine.cpp" + ] for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")): for filename in files: if filename.endswith(".cpp") or filename.endswith(".h"): diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp index 01e24be..93317d7 100644 --- a/torch/csrc/autograd/engine.cpp +++ b/torch/csrc/autograd/engine.cpp @@ -27,11 +27,16 @@ #include #include +#if defined(USE_CUDA) || defined(USE_ROCM) #ifdef USE_CUDA #include +#endif // USE_CUDA +#ifdef USE_ROCM +#include +#endif // USE_ROCM #include #include -#endif +#endif // defined(USE_CUDA) || defined(USE_ROCM) namespace torch { namespace autograd { @@ -206,7 +211,7 @@ Engine::~Engine() = default; // not CUDA. auto Engine::thread_init(int device) -> void { THInferNumThreads(); -#ifdef USE_CUDA +#if defined(USE_CUDA) || defined(USE_ROCM) // NB: We MUST NOT construct the guard for device -1, // as in some settings we compile with USE_CUDA, but // have lazy stubs for CUDA functionality (so actually @@ -630,10 +635,25 @@ auto Engine::ready_queue(int device) -> ReadyQueue& { auto Engine::start_threads() -> void { int num_devices = 0; #ifdef USE_CUDA - // check for case of compiled with CUDA but no available devices - if (cudaGetDeviceCount(&num_devices) != cudaSuccess) { - cudaGetLastError(); - num_devices = 0; + { + int num_cuda_devices = 0; + // check for case of compiled with CUDA but no available devices + if (cudaGetDeviceCount(&num_cuda_devices) != cudaSuccess) { + cudaGetLastError(); + } else { + num_devices += num_cuda_devices; + } + } +#endif +#ifdef USE_ROCM + { + int num_hip_devices = 0; + // check for case of compiled with CUDA but no available devices + if (hipGetDeviceCount(&num_hip_devices) != hipSuccess) { + hipGetLastError(); + } else { + num_devices += num_hip_devices; + } } #endif // One for CPU, plus one for every GPU device -- 2.7.4