add BFloat16 support for fold and unfold on CPU (#62880)

author CaoE <e.cao@intel.com>

Tue, 31 Aug 2021 02:12:23 +0000 (19:12 -0700)

committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>

Tue, 31 Aug 2021 02:14:10 +0000 (19:14 -0700)
author CaoE <e.cao@intel.com>
Tue, 31 Aug 2021 02:12:23 +0000 (19:12 -0700)
committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Tue, 31 Aug 2021 02:14:10 +0000 (19:14 -0700)
diff --git a/aten/src/ATen/native/Col2Im.cpp b/aten/src/ATen/native/Col2Im.cpp

index e1cc31d..7e11b1b 100644 (file)
--- a/aten/src/ATen/native/Col2Im.cpp
+++ b/aten/src/ATen/native/Col2Im.cpp
@@ -136,7 +136,7 @@ static void col2im_out_cpu_template(
    output.resize_({batch_size, n_output_plane, output_height, output_width});
    output.zero_();
  
-  AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
+  AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kBFloat16, kHalf,
        input.scalar_type(), "col2im_out_cpu", [&] {
          Tensor input_n = Tensor();
          Tensor output_n = Tensor();
diff --git a/aten/src/ATen/native/Im2Col.cpp b/aten/src/ATen/native/Im2Col.cpp

index 0970095..586b961 100644 (file)
--- a/aten/src/ATen/native/Im2Col.cpp
+++ b/aten/src/ATen/native/Im2Col.cpp
@@ -86,7 +86,7 @@ static void im2col_out_cpu_template(
    output.resize_({batch_size, n_output_plane, output_length});
    output.zero_();
  
-  AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
+  AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kBFloat16, kHalf,
        input.scalar_type(), "im2col_out_cpu", [&] {
          Tensor input_n;
          Tensor output_n;
diff --git a/test/test_nn.py b/test/test_nn.py

index c6d0e78..96321ba 100644 (file)
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -17438,14 +17438,30 @@ class TestNNDeviceType(NNTestCase):
              m(input)
  
      def test_fold(self, device):
+        def test_dtype(fn, input, dtype):
+            input = input.detach().clone().to(dtype=dtype).requires_grad_(True)
+            input2 = input.detach().clone().float().requires_grad_(True)
+            out = fn(input)
+            out.sum().backward()
+            out2 = fn(input2)
+            out2.sum().backward()
+            self.assertEqual(out.dtype, dtype)
+            self.assertEqual(input.grad.dtype, dtype)
+            self.assertEqual(out, out2.to(dtype=dtype), atol=0.05, rtol=0)
+            self.assertEqual(input.grad, input2.grad.to(dtype=dtype))
+
          def func(x):
              return F.fold(x, output_size=(4, 5), kernel_size=(2, 2))
+
          seeds = (44, 83, 71, 25, 999)
          for sd in seeds:
              torch.manual_seed(sd)
              x = torch.randn(1, 12, 12, device=device, requires_grad=True)
              gradcheck(func, [x])
              gradgradcheck(func, [x])
+            if device == 'cpu':
+                test_dtype(func, x, torch.bfloat16)
+
  
      def test_logsigmoid_out(self, device):
          # this isn't actually documented, but was broken previously:
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py

index 04db52b..e7d9380 100644 (file)
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -7267,6 +7267,7 @@ op_db: List[OpInfo] = [
      OpInfo('nn.functional.unfold',
             aten_name='im2col',
             dtypes=floating_types_and(torch.half),
+           dtypesIfCPU=floating_types_and(torch.half, torch.bfloat16),
             sample_inputs_func=sample_inputs_nn_unfold,
             skips=(
                 # JIT alias info internal asserts here
author	CaoE <e.cao@intel.com>
	Tue, 31 Aug 2021 02:12:23 +0000 (19:12 -0700)
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
	Tue, 31 Aug 2021 02:14:10 +0000 (19:14 -0700)
aten/src/ATen/native/Col2Im.cpp		patch \| blob \| history
aten/src/ATen/native/Im2Col.cpp		patch \| blob \| history
test/test_nn.py		patch \| blob \| history
torch/testing/_internal/common_methods_invocations.py		patch \| blob \| history