From f0f5cffde9196dd5ef46a6d6ba17975ea995c3ca Mon Sep 17 00:00:00 2001
From: Rohan Varma <rvarm1@fb.com>
Date: Wed, 18 Aug 2021 11:38:11 -0700
Subject: [PATCH] [DDP] Add a debug check in cpp fp16 compress (#63379)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/63379

this codepath has been prone to bugs as seen in the below diff, this
will help ensure against changes/refactors that touch this, as a basic sanity
check. Enabled it in debug-only builds to not affect the perf.
ghstack-source-id: 136056093

Test Plan: CI

Reviewed By: SciPioneer

Differential Revision: D30358440

fbshipit-source-id: e1b3893a223722c2593ceed8696a09c7d07d47c1
---
 torch/csrc/distributed/c10d/default_comm_hooks.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/torch/csrc/distributed/c10d/default_comm_hooks.cpp b/torch/csrc/distributed/c10d/default_comm_hooks.cpp
index 91700baa2e..30bc96b16f 100644
--- a/torch/csrc/distributed/c10d/default_comm_hooks.cpp
+++ b/torch/csrc/distributed/c10d/default_comm_hooks.cpp
@@ -1,4 +1,6 @@
 #include <c10d/default_comm_hooks.hpp>
+#include <c10/core/ScalarType.h>
+#include <c10/util/Exception.h>
 
 #include <c10d/ProcessGroup.hpp>
 #include <c10d/comm.hpp>
@@ -31,6 +33,11 @@ c10::intrusive_ptr<c10::ivalue::Future> FP16CompressCommHook::runHook(
         "ProcessGroup::allreduce should return TensorList");
 
     auto reduce_tensor = result.toTensorVector()[0];
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+      reduce_tensor.scalar_type() == at::ScalarType::Half,
+      "Expected reduced tensor to be fp16 in FP16CompressHook, but got type ",
+      reduce_tensor.scalar_type()
+    );
     decompressed_tensor.copy_(reduce_tensor);
     return c10::IValue(decompressed_tensor);
   };
-- 
2.34.1