[DDP] Add a debug check in cpp fp16 compress (#63379)

author Rohan Varma <rvarm1@fb.com>

Wed, 18 Aug 2021 18:38:11 +0000 (11:38 -0700)

committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>

Wed, 18 Aug 2021 18:51:19 +0000 (11:51 -0700)
author Rohan Varma <rvarm1@fb.com>
Wed, 18 Aug 2021 18:38:11 +0000 (11:38 -0700)
committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Wed, 18 Aug 2021 18:51:19 +0000 (11:51 -0700)
diff --git a/torch/csrc/distributed/c10d/default_comm_hooks.cpp b/torch/csrc/distributed/c10d/default_comm_hooks.cpp

index 91700baa2e4a584754e3eb15eeb51c774a8d30c6..30bc96b16f7db9df987cb3dd3bad47fcc8e9b0aa 100644 (file)
--- a/torch/csrc/distributed/c10d/default_comm_hooks.cpp
+++ b/torch/csrc/distributed/c10d/default_comm_hooks.cpp
@@ -1,4 +1,6 @@
  #include <c10d/default_comm_hooks.hpp>
+#include <c10/core/ScalarType.h>
+#include <c10/util/Exception.h>
  
  #include <c10d/ProcessGroup.hpp>
  #include <c10d/comm.hpp>
@@ -31,6 +33,11 @@ c10::intrusive_ptr<c10::ivalue::Future> FP16CompressCommHook::runHook(
          "ProcessGroup::allreduce should return TensorList");
  
      auto reduce_tensor = result.toTensorVector()[0];
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+      reduce_tensor.scalar_type() == at::ScalarType::Half,
+      "Expected reduced tensor to be fp16 in FP16CompressHook, but got type ",
+      reduce_tensor.scalar_type()
+    );
      decompressed_tensor.copy_(reduce_tensor);
      return c10::IValue(decompressed_tensor);
    };
author	Rohan Varma <rvarm1@fb.com>
	Wed, 18 Aug 2021 18:38:11 +0000 (11:38 -0700)
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
	Wed, 18 Aug 2021 18:51:19 +0000 (11:51 -0700)