"caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
"caffe2/operators/pool_op_cudnn.cu",
'**/hip/**',
+ # These files are compatible with both cuda and hip
"aten/src/ATen/core/*",
+ "torch/csrc/autograd/engine.cpp"
]
json_settings = os.path.join(amd_build_dir, "disabled_features.json")
subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir)
# Make various replacements inside AMD_BUILD/torch directory
- ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp",
- "csrc/cuda/cuda_check.h"]
+ ignore_files = [
+ # These files use nvrtc, hip doesn't have equivalent
+ "csrc/autograd/profiler.h",
+ "csrc/autograd/profiler.cpp",
+ "csrc/cuda/cuda_check.h",
+ # These files are compatible with both cuda and hip
+ "csrc/autograd/engine.cpp"
+ ]
for root, _directories, files in os.walk(os.path.join(proj_dir, "torch")):
for filename in files:
if filename.endswith(".cpp") or filename.endswith(".h"):
#include <queue>
#include <TH/TH.h>
+#if defined(USE_CUDA) || defined(USE_ROCM)
#ifdef USE_CUDA
#include <cuda.h>
+#endif // USE_CUDA
+#ifdef USE_ROCM
+#include <hip/hip_runtime.h>
+#endif // USE_ROCM
#include <THC/THC.h>
#include <ATen/cuda/CUDAGuard.h>
-#endif
+#endif // defined(USE_CUDA) || defined(USE_ROCM)
namespace torch { namespace autograd {
// not CUDA.
auto Engine::thread_init(int device) -> void {
THInferNumThreads();
-#ifdef USE_CUDA
+#if defined(USE_CUDA) || defined(USE_ROCM)
// NB: We MUST NOT construct the guard for device -1,
// as in some settings we compile with USE_CUDA, but
// have lazy stubs for CUDA functionality (so actually
auto Engine::start_threads() -> void {
int num_devices = 0;
#ifdef USE_CUDA
- // check for case of compiled with CUDA but no available devices
- if (cudaGetDeviceCount(&num_devices) != cudaSuccess) {
- cudaGetLastError();
- num_devices = 0;
+ {
+ int num_cuda_devices = 0;
+ // check for case of compiled with CUDA but no available devices
+ if (cudaGetDeviceCount(&num_cuda_devices) != cudaSuccess) {
+ cudaGetLastError();
+ } else {
+ num_devices += num_cuda_devices;
+ }
+ }
+#endif
+#ifdef USE_ROCM
+ {
+ int num_hip_devices = 0;
+ // check for case of compiled with CUDA but no available devices
+ if (hipGetDeviceCount(&num_hip_devices) != hipSuccess) {
+ hipGetLastError();
+ } else {
+ num_devices += num_hip_devices;
+ }
}
#endif
// One for CPU, plus one for every GPU device