Do not modify saved variables in-place for spectral norm during power iteration ...

author soulitzer <soulitzer@gmail.com>

Tue, 24 Aug 2021 20:02:27 +0000 (13:02 -0700)

committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>

Tue, 24 Aug 2021 20:08:59 +0000 (13:08 -0700)
author soulitzer <soulitzer@gmail.com>
Tue, 24 Aug 2021 20:02:27 +0000 (13:02 -0700)
committer Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Tue, 24 Aug 2021 20:08:59 +0000 (13:08 -0700)
diff --git a/test/test_nn.py b/test/test_nn.py

index 07a2b48..43e105a 100644 (file)
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -4220,6 +4220,9 @@ class TestNN(NNTestCase):
                      out1 = wrapped_m(input)
                      return out0 + out1
  
+                # Make sure we can compute gradients wrt to all the parameters in the case
+                # of double forward
+                fn(input.clone().requires_grad_()).sum().backward()
                  gradcheck(fn, (input.clone().requires_grad_(),), check_batched_grad=False)
  
                  # test removing
diff --git a/torch/nn/utils/parametrizations.py b/torch/nn/utils/parametrizations.py

index 7941f41..de3d5c7 100644 (file)
--- a/torch/nn/utils/parametrizations.py
+++ b/torch/nn/utils/parametrizations.py
@@ -84,6 +84,7 @@ class _SpectralNorm(Module):
  
          # Precondition
          assert weight_mat.ndim > 1
+
          for _ in range(n_power_iterations):
              # Spectral norm of weight equals to `u^T W v`, where `u` and `v`
              # are the first left and right singular vectors.
@@ -92,9 +93,6 @@ class _SpectralNorm(Module):
                                    dim=0, eps=self.eps, out=self._u)   # type: ignore[has-type]
              self._v = F.normalize(torch.mv(weight_mat.t(), self._u),
                                    dim=0, eps=self.eps, out=self._v)   # type: ignore[has-type]
-        # See above on why we need to clone
-        self._u = self._u.clone(memory_format=torch.contiguous_format)
-        self._v = self._v.clone(memory_format=torch.contiguous_format)
  
      def forward(self, weight: torch.Tensor) -> torch.Tensor:
          if weight.ndim == 1:
@@ -104,10 +102,13 @@ class _SpectralNorm(Module):
              weight_mat = self._reshape_weight_to_matrix(weight)
              if self.training:
                  self._power_method(weight_mat, self.n_power_iterations)
+            # See above on why we need to clone
+            u = self._u.clone(memory_format=torch.contiguous_format)
+            v = self._v.clone(memory_format=torch.contiguous_format)
              # The proper way of computing this should be through F.bilinear, but
              # it seems to have some efficiency issues:
              # https://github.com/pytorch/pytorch/issues/58093
-            sigma = torch.dot(self._u, torch.mv(weight_mat, self._v))
+            sigma = torch.dot(u, torch.mv(weight_mat, v))
              return weight / sigma
  
      def right_inverse(self, value: torch.Tensor) -> torch.Tensor:
author	soulitzer <soulitzer@gmail.com>
	Tue, 24 Aug 2021 20:02:27 +0000 (13:02 -0700)
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
	Tue, 24 Aug 2021 20:08:59 +0000 (13:08 -0700)
test/test_nn.py		patch \| blob \| history
torch/nn/utils/parametrizations.py		patch \| blob \| history