From d35c39e73b04d7ab95812be8cbda21023e1b5cc4 Mon Sep 17 00:00:00 2001
From: Brennan Vincent <brennan@umanwizard.com>
Date: Thu, 4 Apr 2019 17:18:11 -0700
Subject: [PATCH] don't attempt to multiply by a sparse matrix (#18737)

Summary:
Tested by running the script in #16562 , and there was no error.

Then:
```
>>> print(mat.grad)
tensor([[1., 2., 3.],
        [1., 2., 3.],
        [1., 2., 3.]])
```

which is correct.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/18737

Differential Revision: D14773078

Pulled By: umanwizard

fbshipit-source-id: 8aa36eb6f6aa104263a467d9ac91d61b3bfd05f5
---
 test/test_sparse.py                    | 11 ++++++++---
 tools/autograd/templates/Functions.cpp | 11 +++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/test/test_sparse.py b/test/test_sparse.py
index e2a5dfd..5acf650 100644
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@@ -863,8 +863,12 @@ class TestSparse(TestCase):
         test_shape(7, 8, 9, 20, True)
 
     def test_sparse_mm(self):
-        def test_shape(d1, d2, d3, nnz):
-            D = torch.randn(d2, d3, device=self.device).requires_grad_(True)
+        def test_shape(d1, d2, d3, nnz, transposed):
+            if transposed:
+                D = torch.randn(d3, d2,
+                                device=self.device).t_().requires_grad_(True)
+            else:
+                D = torch.randn(d2, d3, device=self.device).requires_grad_(True)
             S = self._gen_sparse(2, nnz, [d1, d2])[0]
             S_dense = S.to_dense().requires_grad_(True)
             S.requires_grad_(True)
@@ -874,7 +878,8 @@ class TestSparse(TestCase):
                 return torch.sparse.mm(S, D)
             gradcheck(fn, (S, D), check_sparse_nnz=True)
 
-        test_shape(7, 8, 9, 20)
+        test_shape(7, 8, 9, 20, False)
+        test_shape(7, 8, 9, 20, True)
 
     @skipIfRocm
     def test_dsmm(self):
diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp
index 68012b0..ba3ccbb 100644
--- a/tools/autograd/templates/Functions.cpp
+++ b/tools/autograd/templates/Functions.cpp
@@ -524,6 +524,17 @@ Tensor mm_mat1_backward(const Tensor & grad, const Tensor & mat2, const Tensor &
 Tensor mm_mat2_backward(const Tensor & grad, const Tensor & mat1, IntArrayRef sizes, IntArrayRef strides, const Scalar & alpha) {
   // if input was column-major, return grad as column-order for efficiency
   if (strides[0] == 1 && strides[1] == sizes[0]) {
+    if (mat1.is_sparse()) {
+      // Since mm(dense, sparse) doesn't exist,
+      // pass a transposed output matrix to the underlying "addmm"
+      // function directly.
+      int64_t out_rows = mat1.size(1);
+      int64_t out_cols = grad.size(1);
+      Tensor t = at::zeros({}, grad.options()).expand({out_rows, out_cols}, true);
+      Tensor r = at::empty({out_cols, out_rows}, grad.options()).t();
+      at::s_native_addmm_out(r, t, mat1.t(), grad, alpha, 1);
+      return r;
+    }
     return maybe_multiply(grad.t().mm(mat1).t(), alpha);
   } else {
     return maybe_multiply(mat1.t().mm(grad), alpha);
-- 
2.7.4