From 5fa78303edcfcc841c30ef27462f80904b3c79a2 Mon Sep 17 00:00:00 2001 From: Natalia Gimelshein Date: Thu, 21 Feb 2019 14:35:20 -0800 Subject: [PATCH] fix double backward for half softmax/logsoftmax (#17330) Summary: Fix for #17261, SsnL do you have tests for it in your other PR? If not, I'll add to this. Example from #17261 now does not error out (and same for log_softmax). Pull Request resolved: https://github.com/pytorch/pytorch/pull/17330 Differential Revision: D14171529 Pulled By: soumith fbshipit-source-id: ee925233feb1b44ef9f1d757db59ca3601aadef2 --- test/test_nn.py | 3 +-- tools/autograd/derivatives.yaml | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/test/test_nn.py b/test/test_nn.py index 0a5d070..2640666 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -2142,8 +2142,7 @@ class TestNN(NNTestCase): def _test_softmax_backward(self, device): if device.type == 'cuda': - dtypes = [torch.float] - # FIXME: add torch.half after https://github.com/pytorch/pytorch/issues/17261 is fixed + dtypes = [torch.float, torch.half] else: dtypes = [torch.float] # FIXME: add (10, 0) after https://github.com/pytorch/pytorch/issues/17262 is fixed diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index d690119..6dac188 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -214,10 +214,10 @@ self: clamp_backward(grad, self, min, max) - name: clamp_min(Tensor self, Scalar min) - self: grad * (self >= min).type_as(grad) + self: grad * (self >= min).to(grad.dtype()) - name: clamp_max(Tensor self, Scalar max) - self: grad * (self <= max).type_as(grad) + self: grad * (self <= max).to(grad.dtype()) - name: clone(Tensor self) self: grad @@ -1206,8 +1206,8 @@ self: log_sigmoid_double_backward(grad * grad_output, self) - name: _log_softmax_backward_data(Tensor grad_output, Tensor output, int64_t dim, Tensor self) - grad_output: grad - (grad * output.exp()).sum(dim, true) - self: log_softmax_double_backward(grad, grad_output, dim, output).type_as(self) + grad_output: grad.to(output.dtype()) - (grad.to(output.dtype()) * output.exp()).sum(dim, true) + self: log_softmax_double_backward(grad.to(output.dtype()), grad_output, dim, output).to(self.dtype()) - name: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope) grad_output: leaky_relu_backward(grad, self, negative_slope) @@ -1270,8 +1270,8 @@ self: softplus_double_backward(grad * grad_output, self, beta, threshold) - name: _softmax_backward_data(Tensor grad_output, Tensor output, int64_t dim, Tensor self) - grad_output: _softmax_backward_data(grad, output, dim, self) - self: softmax_double_backward(grad, grad_output, dim, output).type_as(self) + grad_output: _softmax_backward_data(grad.to(output.dtype()), output, dim, self) + self: softmax_double_backward(grad.to(output.dtype()), grad_output, dim, output).to(self.dtype()) - name: soft_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int64_t reduction) grad_output: soft_margin_loss_double_backward_grad_output(grad, grad_output, self, target, reduction) -- 2.7.4