From a21e256e8db77a339e0e1975a4ea41b671f3b0d9 Mon Sep 17 00:00:00 2001 From: Wanchao Liang Date: Wed, 3 Apr 2019 11:18:05 -0700 Subject: [PATCH] Fix contiguous AD and Autogradzero inconsistency (#18633) Summary: Fixes #17962 Pull Request resolved: https://github.com/pytorch/pytorch/pull/18633 Differential Revision: D14700449 Pulled By: wanchaol fbshipit-source-id: 3d15d67c01b69b28394a0f2f001db90ed9fd31dc --- test/common_methods_invocations.py | 2 ++ tools/jit/templates/register_aten_ops.cpp | 30 ++++++++++++++++++++---------- torch/csrc/jit/autodiff.cpp | 16 ++++++++-------- torch/csrc/jit/symbolic_script.cpp | 2 +- 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/test/common_methods_invocations.py b/test/common_methods_invocations.py index d1f0809..738f680 100644 --- a/test/common_methods_invocations.py +++ b/test/common_methods_invocations.py @@ -538,6 +538,8 @@ def method_tests(): ('norm', (), (3, 0, True), 'keepdim_3_dim_scalar', (), [1]), ('clone', (S, M, S), NO_ARGS), ('clone', (), NO_ARGS, 'scalar'), + ('contiguous', (S, S), NO_ARGS, '', (True,)), + ('contiguous', torch.randn(S, S).transpose(0, 1), NO_ARGS, 'not_contiguous', (True,)), ('dist', (S, S, S), ((S, S, S),)), ('dist', (S, S, S), ((S,),), 'broadcast_rhs'), ('dist', (S,), ((S, S, S),), 'broadcast_lhs'), diff --git a/tools/jit/templates/register_aten_ops.cpp b/tools/jit/templates/register_aten_ops.cpp index 7d8ba11..0fb2975 100644 --- a/tools/jit/templates/register_aten_ops.cpp +++ b/tools/jit/templates/register_aten_ops.cpp @@ -81,16 +81,16 @@ std::array as_bool_array(const std::vector& vec) { RegisterOperators reg({ Operator( - "aten::get_device(Tensor self) -> int", - [](Stack & stack) { - autograd::profiler::RecordFunction record("get_device"); - auto result = at::get_device( - (std::move(peek(stack, 0, 1))).toTensor() - ); - drop(stack, 1); - pack(stack, std::move(result)); - return 0; - } + "aten::get_device(Tensor self) -> int", + [](Stack & stack) { + autograd::profiler::RecordFunction record("get_device"); + auto result = at::get_device( + (std::move(peek(stack, 0, 1))).toTensor() + ); + drop(stack, 1); + pack(stack, std::move(result)); + return 0; + } ), Operator( "aten::storage_offset(Tensor self) -> int", @@ -102,6 +102,16 @@ RegisterOperators reg({ return 0; } ), + Operator( + "aten::is_contiguous(Tensor self) -> bool", + [](Stack & stack) { + autograd::profiler::RecordFunction record("is_contiguous"); + auto result = ((std::move(peek(stack, 0, 1))).toTensor()).is_contiguous(); + drop(stack, 1); + pack(stack, std::move(result)); + return 0; + } + ), // Generated operators ${constructors} diff --git a/torch/csrc/jit/autodiff.cpp b/torch/csrc/jit/autodiff.cpp index 61215a8..71be5fb 100644 --- a/torch/csrc/jit/autodiff.cpp +++ b/torch/csrc/jit/autodiff.cpp @@ -198,7 +198,7 @@ bool isDifferentiable(Graph& g) { // graph and a backward graph. Forward graph will be used to replace the node in // grad_desc.f, and backward graph will be used to construct GradOf(node) in // reverse_block. Grad_values(a.k.a gradOutputs) propagated through -// node->owningGraph() in **reversed** order, thus GradientPair.forward ahould +// node->owningGraph() in **reversed** order, thus GradientPair.forward should // be inserted **after** the node being replaced, so that we don't traverse the // graph infinite times. // @@ -775,10 +775,10 @@ class GradientHelper { // later. It is ok to replace any backward function with known-zero inputs with // something that produces known-zero outputs. This function encloses each // know-linear backward function in a 'GradOf' sub-block so that we can perform -// optimizations using this information. In particular, specializeUndef will -// observe if all the inputs to the linear block are Undef, which the autograd -// uses to represent zeros, and then propagate the undefs to the outputs of the -// block. +// optimizations using this information. In particular, specializeAutogradZero will +// observe if all the inputs to the linear block are AutogradZeroTensor, which the +// autograd uses to represent zeros, and then propagate the zeros to the outputs +// of the block. static std::vector linearGradientForNode( Node* node, ArrayRef grad_values) { @@ -795,7 +795,7 @@ static std::vector linearGradientForNode( WithInsertPoint guard(block); auto results = GradientHelper(node).gradient(grad_values); return fmap(results, [block, linear](Value* grad) -> Value* { - if (!grad) + if (!grad || grad->mustBeNone()) return nullptr; block->registerOutput(grad); return linear->addOutput()->copyMetadata(grad); @@ -841,8 +841,8 @@ static ReverseDetails addReverseInline(Gradient& grad_desc) { const auto get_grad = [&](Value* v) -> Value* { auto it = grad_map.find(v); if (it == grad_map.end()) { - auto undef = graph.insertNode(graph.createAutogradZero()); - std::tie(it, std::ignore) = grad_map.emplace(v, undef->output()); + auto autograd_zero = graph.insertNode(graph.createAutogradZero()); + std::tie(it, std::ignore) = grad_map.emplace(v, autograd_zero->output()); } return it->second; }; diff --git a/torch/csrc/jit/symbolic_script.cpp b/torch/csrc/jit/symbolic_script.cpp index a87470b..8b697e7 100644 --- a/torch/csrc/jit/symbolic_script.cpp +++ b/torch/csrc/jit/symbolic_script.cpp @@ -404,7 +404,7 @@ const std::vector functions = { def contiguous(self): def backward(grad_output): - return None + return grad_output return self.contiguous(), backward -- 2.7.4