From d895d30876015862ba64e40fde610274c281f507 Mon Sep 17 00:00:00 2001
From: Gregory Chanan <gchanan@fb.com>
Date: Fri, 29 Mar 2019 13:31:42 -0700
Subject: [PATCH] Fix c10d build without nccl.

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18582

Differential Revision: D14672928

Pulled By: gchanan

fbshipit-source-id: 74e9805cbaf5ebe8e3f579fe08dad72eb410b80a
---
 torch/csrc/distributed/c10d/ddp.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/torch/csrc/distributed/c10d/ddp.cpp b/torch/csrc/distributed/c10d/ddp.cpp
index 7673431..c60d077 100644
--- a/torch/csrc/distributed/c10d/ddp.cpp
+++ b/torch/csrc/distributed/c10d/ddp.cpp
@@ -3,7 +3,9 @@
 #include <torch/csrc/cuda/comm.h>
 #include <torch/csrc/utils/tensor_flatten.h>
 
+#ifdef USE_C10D_NCCL
 #include <torch/csrc/cuda/nccl.h>
+#endif
 
 #include <c10d/ProcessGroup.hpp>
 
@@ -129,6 +131,11 @@ std::tuple<std::shared_ptr<ProcessGroup::Work>, at::Tensor> queueReduction(
     ProcessGroup& processGroup,
     std::vector<std::vector<at::Tensor>>& gradsBatch,
     const std::vector<int64_t>& devices) {
+#ifndef USE_C10D_NCCL
+  if (devices.size() > 1) {
+    AT_ERROR("queueReduction with more than 1 device not suppported without NCCL");
+  }
+#endif
   AT_ASSERT(!gradsBatch.empty());
   AT_ASSERT(!devices.empty());
 
@@ -170,7 +177,11 @@ std::tuple<std::shared_ptr<ProcessGroup::Work>, at::Tensor> queueReduction(
   }
 
   if (devices.size() > 1) {
+#ifdef USE_C10D_NCCL
     torch::cuda::nccl::reduce(gradsBatchCoalesced, 0);
+#else
+    AT_ERROR("shouldn't have gotten here -- queueReduction not suppported without NCCL");
+#endif
   }
 
   gradsBatchCoalesced[0] /= processGroup.getSize();
-- 
2.7.4