Add sparse gradient option to `gather` operation (#17182)

author Natalia Gimelshein <ngimelshein@nvidia.com>

Wed, 27 Feb 2019 19:39:37 +0000 (11:39 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Wed, 27 Feb 2019 19:42:48 +0000 (11:42 -0800)
author Natalia Gimelshein <ngimelshein@nvidia.com>
Wed, 27 Feb 2019 19:39:37 +0000 (11:39 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Wed, 27 Feb 2019 19:42:48 +0000 (11:42 -0800)
diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h

index 0682f69..699ea46 100644 (file)
--- a/aten/src/ATen/core/Tensor.h
+++ b/aten/src/ATen/core/Tensor.h
@@ -664,7 +664,7 @@ class CAFFE2_API Tensor {
    Tensor index_select(int64_t dim, const Tensor & index) const;
    Tensor masked_select(const Tensor & mask) const;
    Tensor nonzero() const;
-  Tensor gather(int64_t dim, const Tensor & index) const;
+  Tensor gather(int64_t dim, const Tensor & index, bool sparse_grad=false) const;
    Tensor addcmul(const Tensor & tensor1, const Tensor & tensor2, Scalar value=1) const;
    Tensor addcdiv(const Tensor & tensor1, const Tensor & tensor2, Scalar value=1) const;
    std::tuple<Tensor,Tensor> gels(const Tensor & A) const;
diff --git a/aten/src/ATen/core/TensorMethods.h b/aten/src/ATen/core/TensorMethods.h

index 026b2c3..bf64977 100644 (file)
--- a/aten/src/ATen/core/TensorMethods.h
+++ b/aten/src/ATen/core/TensorMethods.h
@@ -1126,8 +1126,8 @@ inline Tensor Tensor::masked_select(const Tensor & mask) const {
  inline Tensor Tensor::nonzero() const {
      return type().nonzero(*this);
  }
-inline Tensor Tensor::gather(int64_t dim, const Tensor & index) const {
-    return type().gather(*this, dim, index);
+inline Tensor Tensor::gather(int64_t dim, const Tensor & index, bool sparse_grad) const {
+    return type().gather(*this, dim, index, sparse_grad);
  }
  inline Tensor Tensor::addcmul(const Tensor & tensor1, const Tensor & tensor2, Scalar value) const {
      return type().addcmul(*this, tensor1, tensor2, value);
diff --git a/aten/src/ATen/core/Type.h b/aten/src/ATen/core/Type.h

index 1976eeb..1e9edbd 100644 (file)
--- a/aten/src/ATen/core/Type.h
+++ b/aten/src/ATen/core/Type.h
@@ -564,7 +564,7 @@ struct CAFFE2_API Type {
    virtual Tensor index_select(const Tensor & self, int64_t dim, const Tensor & index) const = 0;
    virtual Tensor masked_select(const Tensor & self, const Tensor & mask) const = 0;
    virtual Tensor nonzero(const Tensor & self) const = 0;
-  virtual Tensor gather(const Tensor & self, int64_t dim, const Tensor & index) const = 0;
+  virtual Tensor gather(const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad) const = 0;
    virtual Tensor addcmul(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, Scalar value) const = 0;
    virtual Tensor addcdiv(const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, Scalar value) const = 0;
    virtual std::tuple<Tensor,Tensor> gels(const Tensor & self, const Tensor & A) const = 0;
diff --git a/aten/src/ATen/native/Indexing.cpp b/aten/src/ATen/native/Indexing.cpp

index e00dd88..154d823 100644 (file)
--- a/aten/src/ATen/native/Indexing.cpp
+++ b/aten/src/ATen/native/Indexing.cpp
@@ -551,4 +551,24 @@ Tensor masked_fill(const Tensor & self, const Tensor & mask, const Tensor & sour
    return _self.clone().masked_fill_(mask, source);
  }
  
+Tensor _gather_sparse_backward(const Tensor& self, int64_t dim, const Tensor& index, const Tensor& grad){
+// special case scalar input and/or index
+    if (self.ndimension() == 0) return at::_sparse_coo_tensor_unsafe(at::empty({0,grad.numel()}, index.options()), grad, self.sizes());
+    if (grad.ndimension() == 0) return at::_sparse_coo_tensor_unsafe(index.view({1,1}), grad, self.sizes());
+    Tensor sparse_ind = at::empty({self.ndimension(), grad.numel()}, self.options().dtype(at::kLong));
+    int64_t n_above = grad.numel();
+    int64_t n_below = 1;
+    if (dim < 0) dim += self.ndimension();
+    for (int i=0; i<self.ndimension(); i++) {
+        n_above /= grad.size(i);
+        if (i == dim) {
+            sparse_ind[i] = index.reshape(-1);
+        } else {
+            sparse_ind[i] = at::arange(grad.size(i),self.options().dtype(at::kLong)).unsqueeze(1).expand({grad.size(i), n_above}).reshape(-1).repeat(n_below);
+        }
+        n_below *= grad.size(i);
+    }
+    return at::_sparse_coo_tensor_unsafe(sparse_ind, grad.reshape(-1), self.sizes());
+}
+
  }} // at::native
diff --git a/aten/src/ATen/native/LegacyDefinitions.cpp b/aten/src/ATen/native/LegacyDefinitions.cpp

index 329680d..d19c31a 100644 (file)
--- a/aten/src/ATen/native/LegacyDefinitions.cpp
+++ b/aten/src/ATen/native/LegacyDefinitions.cpp
@@ -396,11 +396,11 @@ Tensor nonzero(const Tensor & self) {
    return at::legacy::th::_th_nonzero(self);
  }
  
-Tensor & gather_out(Tensor & result, const Tensor & self, int64_t dim, const Tensor & index) {
+Tensor & gather_out(Tensor & result, const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad) {
    return at::legacy::th::_th_gather_out(result, self, dim, index);
  }
  
-Tensor gather(const Tensor & self, int64_t dim, const Tensor & index) {
+Tensor gather(const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad) {
    return at::legacy::th::_th_gather(self, dim, index);
  }
  
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml

index 6466d6a..5a7536c 100644 (file)
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -3541,13 +3541,15 @@
    matches_jit_signature: True
    variants: method, function
  
-- func: gather(Tensor self, int dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
+- func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)
    matches_jit_signature: True
  
-- func: gather(Tensor self, int dim, Tensor index) -> Tensor
+- func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor
    matches_jit_signature: True
    variants: method, function
  
+- func: _gather_sparse_backward(Tensor self, int dim, Tensor index, Tensor grad) -> Tensor
+
  - func: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)
    matches_jit_signature: True
  
diff --git a/test/test_autograd.py b/test/test_autograd.py

index f747acd..799c9c0 100644 (file)
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -1565,6 +1565,39 @@ class TestAutograd(TestCase):
  
              gradcheck(ctc_after_softmax, [x])
  
+    def _test_sparse_gather(self, size_x, size_ind, dim):
+        x = torch.randn(size_x, requires_grad=True)
+        if len(size_ind) > 0 and len(size_x) > 0:
+            ind = torch.randint(x.size(dim), size_ind)
+        else:
+            ind = torch.zeros(size_ind, dtype=torch.int64)
+        out = torch.gather(x, dim, ind, sparse_grad=False)
+        grad = torch.rand_like(out)
+        out.backward(grad)
+        grad_dense = x.grad.clone()
+        x.grad = None
+        out = torch.gather(x, dim, ind, sparse_grad=True)
+        out.backward(grad)
+        self.assertEqual(grad_dense, x.grad.to_dense())
+
+    def test_sparse_gather_dim0(self):
+        self._test_sparse_gather((10, 10), (5, 10), 0)
+
+    def test_sparse_gather_dim1(self):
+        self._test_sparse_gather((10, 10, 5), (10, 5, 5), 1)
+
+    def test_sparse_gather_dim_neg(self):
+        self._test_sparse_gather((10, 10, 5), (10, 10, 2), -1)
+
+    def test_sparse_gather_ind_scalar(self):
+        self._test_sparse_gather((10,), (), 0)
+
+    def test_sparse_gather_x_scalar(self):
+        self._test_sparse_gather((), (2,), 0)
+
+    def test_sparse_gather_both_scalar(self):
+        self._test_sparse_gather((), (), 0)
+
      def test_gc_in_destructor(self):
          """
          Previously, if a Function destructor triggered a garbage collection,
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml

index 7912d66..ee05b63 100644 (file)
--- a/tools/autograd/derivatives.yaml
+++ b/tools/autograd/derivatives.yaml
@@ -340,8 +340,8 @@
  - name: frac(Tensor self)
    self: grad
  
-- name: gather(Tensor self, int64_t dim, Tensor index)
-  self: at::zeros(self.sizes(), grad.options()).scatter_add_(dim, index, grad)
+- name: gather(Tensor self, int64_t dim, Tensor index, bool sparse_grad)
+  self: "sparse_grad ? at::_gather_sparse_backward(self, dim, index, grad) : at::zeros(self.sizes(), grad.options()).scatter_add_(dim, index, grad)"
  
  - name: ge_(Tensor self, Scalar other)
    self: zeros_like(self)
diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp

index d44ec5a..a87ec8b 100644 (file)
--- a/tools/autograd/templates/Functions.cpp
+++ b/tools/autograd/templates/Functions.cpp
@@ -2075,6 +2075,7 @@ Tensor to_dense_backward(const Tensor& grad, const Tensor& input_) {
    return grad.sparse_mask(at::SparseTensorRef(input));
  }
  
+
  // Because the backward of pad(input, pads) is just pad(grad_output, [-p for p in pads])
  Tensor constant_pad_nd_backward(const Tensor& grad, IntArrayRef pad) {
    auto negated_pad = pad.vec();
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py

index f5906c1..0b48e70 100644 (file)
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@@ -1844,7 +1844,7 @@ Example::
  
  add_docstr(torch.gather,
             r"""
-gather(input, dim, index, out=None) -> Tensor
+gather(input, dim, index, out=None, sparse_grad=False) -> Tensor
  
  Gathers values along an axis specified by `dim`.
  
@@ -1865,6 +1865,7 @@ Args:
      dim (int): the axis along which to index
      index (LongTensor): the indices of elements to gather
      out (Tensor, optional): the destination tensor
+    sparse_grad(bool,optional): If ``True``, gradient w.r.t. :attr:`input` will be a sparse tensor.
  
  Example::
  
diff --git a/torch/csrc/autograd/input_buffer.cpp b/torch/csrc/autograd/input_buffer.cpp

index d98ad88..5322bee 100644 (file)
--- a/torch/csrc/autograd/input_buffer.cpp
+++ b/torch/csrc/autograd/input_buffer.cpp
@@ -22,10 +22,20 @@ void InputBuffer::add(size_t pos, Variable var) {
    } else {
      at::OptionalDeviceGuard device_guard(device_of(var));
      // ATen doesn't route sparse additions correctly...
+    // do dense + sparse in-place if possible
      if (old_var.is_sparse()) {
-      buffer[pos] = var + old_var;
+//storage use_count is a big hammer, but for anything lighter there's an adversarial example with unexpected inplace modification
+      if (!var.is_sparse() && var.is_contiguous() && var.storage().use_count() == 1) {
+          buffer[pos] = var.add_(old_var);
+      } else {
+          buffer[pos] = var + old_var;
+      }
      } else {
-      buffer[pos] = old_var + var;
+      if (var.is_sparse() && !old_var.is_sparse() && old_var.is_contiguous() && old_var.storage().use_count() == 1) {
+          buffer[pos] = old_var.add_(var);
+      } else {
+          buffer[pos] = old_var + var;
+      }
      }
    }
  }
diff --git a/torch/csrc/jit/passes/shape_analysis.cpp b/torch/csrc/jit/passes/shape_analysis.cpp

index e4f6e59..1742128 100644 (file)
--- a/torch/csrc/jit/passes/shape_analysis.cpp
+++ b/torch/csrc/jit/passes/shape_analysis.cpp
@@ -1214,7 +1214,7 @@ class ShapePropagator {
        }
      } else if (
          node->matches(
-            "aten::gather(Tensor self, int dim, Tensor index) -> Tensor")) {
+            "aten::gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor")) {
        auto type = input_type(0);
        auto index_type = input_type(1);
        // Gather has this annoying edge case where index always needs to match
author	Natalia Gimelshein <ngimelshein@nvidia.com>
	Wed, 27 Feb 2019 19:39:37 +0000 (11:39 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Wed, 27 Feb 2019 19:42:48 +0000 (11:42 -0800)
aten/src/ATen/core/Tensor.h		patch \| blob \| history
aten/src/ATen/core/TensorMethods.h		patch \| blob \| history
aten/src/ATen/core/Type.h		patch \| blob \| history
aten/src/ATen/native/Indexing.cpp		patch \| blob \| history
aten/src/ATen/native/LegacyDefinitions.cpp		patch \| blob \| history
aten/src/ATen/native/native_functions.yaml		patch \| blob \| history
test/test_autograd.py		patch \| blob \| history
tools/autograd/derivatives.yaml		patch \| blob \| history
tools/autograd/templates/Functions.cpp		patch \| blob \| history
torch/_torch_docs.py		patch \| blob \| history
torch/csrc/autograd/input_buffer.cpp		patch \| blob \| history
torch/csrc/jit/passes/shape_analysis.cpp		patch \| blob \| history