Fix dense Embedding to work with double backward (#9078)

author kshitij12345 <kshitijkalambarkar@gmail.com>

Wed, 3 Apr 2019 16:16:29 +0000 (09:16 -0700)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Wed, 3 Apr 2019 16:50:34 +0000 (09:50 -0700)
author kshitij12345 <kshitijkalambarkar@gmail.com>
Wed, 3 Apr 2019 16:16:29 +0000 (09:16 -0700)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Wed, 3 Apr 2019 16:50:34 +0000 (09:50 -0700)
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml

index a25bb58..e1cd053 100644 (file)
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -800,7 +800,7 @@
  - func: embedding_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor
    matches_jit_signature: True
  
-- func: embedding_dense_backward(Tensor grad, IndexTensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor
+- func: embedding_dense_backward(Tensor grad_output, IndexTensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor
    dispatch:
      CPU: embedding_dense_backward_cpu
      CUDA: embedding_dense_backward_cuda
diff --git a/test/test_nn.py b/test/test_nn.py

index 25fb4fd..5ba8e62 100644 (file)
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -2049,6 +2049,27 @@ class TestNN(NNTestCase):
          self.assertTrue(embedding.weight.grad.is_sparse)
          self.assertEqual(embedding.weight.grad.shape, embedding.weight.shape)
  
+    def _test_embedding_dense_grad(self, dev):
+        embd = nn.Embedding(20, 20).to(dev)
+        weight = embd.weight
+
+        def fn_wrapper(dev):
+            def fn(weight):
+                inp = torch.tensor([[0, 1, 1, 2], [3, 5, 7, 11]], dtype=torch.long).to(dev)
+                return torch.nn.functional.embedding(inp, weight)
+            return fn
+
+        fn = fn_wrapper(dev)
+        _assertGradAndGradgradChecks(self, fn, (weight, ))
+
+    def test_embedding_dense_grad(self):
+        self._test_embedding_dense_grad("cpu")
+
+    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @skipIfRocm
+    def test_embedding_dense_grad_cuda(self):
+        self._test_embedding_dense_grad("cuda")
+
      def test_embedding_sparse_backward(self):
          embedding = nn.Embedding(10, 3, sparse=True)
          embedding.zero_grad()
@@ -2111,6 +2132,15 @@ class TestNN(NNTestCase):
                  embedding.zero_grad()
                  self.assertEqual(after, pre)
  
+                # test double backward
+                emb_sum = embedding(indices).sum()
+                emb_grad = torch.autograd.grad(outputs=emb_sum, inputs=list(embedding.parameters()), retain_graph=True)
+                scalar = emb_grad[0].sum() + emb_sum
+                scalar.backward()
+                after = (embedding.weight + embedding.weight.grad)[padding_idx]
+                embedding.zero_grad()
+                self.assertEqual(after, pre)
+
      def test_embedding_max_norm(self):
          embedding = nn.Embedding(22, 5, max_norm=1.0)
          input = Variable(torch.LongTensor([2, 8, 8, 6]))
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml

index bea70b5..8e9fd2d 100644 (file)
--- a/tools/autograd/derivatives.yaml
+++ b/tools/autograd/derivatives.yaml
@@ -945,6 +945,9 @@
    indices: not_differentiable
    weight: embedding_backward(grad, indices, weight.size(0), padding_idx, scale_grad_by_freq, sparse)
  
+- name: embedding_dense_backward(Tensor grad_output, Tensor indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq)
+  grad_output: embedding_dense_double_backward(grad, indices)
+
  - name: _embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq, int64_t mode, bool sparse)
    indices: not_differentiable
    offsets: not_differentiable
diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp

index c927624..4736ca2 100644 (file)
--- a/tools/autograd/templates/Functions.cpp
+++ b/tools/autograd/templates/Functions.cpp
@@ -2083,6 +2083,19 @@ Tensor constant_pad_nd_backward(const Tensor& grad, IntArrayRef pad) {
    return at::constant_pad_nd(grad, negated_pad, 0);
  }
  
+Tensor embedding_dense_double_backward(const Tensor & grad, const Tensor & indices) {
+  // since first backward takes care of padding_idx
+  // and scaling by frequency, we don't need to worry
+  // about it here.
+  auto gg_weight = grad.index_select(0, indices.reshape(-1));
+
+  // reshape gradient as per the shape of indices
+  auto size = indices.sizes().vec();
+  size.push_back(-1);
+
+  return gg_weight.view(size);
+}
+
  } // anonymous namespace
  
  ${autograd_function_definitions}
author	kshitij12345 <kshitijkalambarkar@gmail.com>
	Wed, 3 Apr 2019 16:16:29 +0000 (09:16 -0700)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Wed, 3 Apr 2019 16:50:34 +0000 (09:50 -0700)
aten/src/ATen/native/native_functions.yaml		patch \| blob \| history
test/test_nn.py		patch \| blob \| history
tools/autograd/derivatives.yaml		patch \| blob \| history
tools/autograd/templates/Functions.cpp		patch \| blob \| history