Rename 'not_differentiable' to 'non_differentiable'. (#19272)
authorGregory Chanan <gchanan@fb.com>
Fri, 19 Apr 2019 13:58:41 +0000 (06:58 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 19 Apr 2019 14:07:55 +0000 (07:07 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/19272
ghimport-source-id: 755e91efa68c5a1c4377a6853f21b3eee3f8cab5

Differential Revision: D15003381

Pulled By: gchanan

fbshipit-source-id: 54db27c5c5e65acf65821543db3217de9dd9bdb5

tools/autograd/derivatives.yaml
tools/autograd/gen_variable_type.py
tools/autograd/load_derivatives.py

index 4d7b568..9845551 100644 (file)
@@ -8,12 +8,12 @@
 #     Note that a single gradient entry can specify the gradient
 #     formula for multiple input names, by specifying a key
 #     "input1, input2" (see atan2 for an example).
-#   - An argument can be flagged as 'not_differentiable'.
+#   - An argument can be flagged as 'non_differentiable'.
 #     In general there are 3 possibilities:
 #       1. An argument has an entry with a specified gradient
 #       2. An argument has an entry specified as not differentiable
 #       3. An argument has no entry
-#     Using the flag 'not_differentiable' resolves to the second case.
+#     Using the flag 'non_differentiable' resolves to the second case.
 #     The second case was introduced in support for arguments of
 #     type e.g. IndexTensor for 'embedding', that are not differentiable.
 #     TODO: Determine whether case 3 and case 2 can be replaced by one concept.
   self: grad.reshape(self.sizes())
 
 - name: _s_where(Tensor condition, Tensor self, Tensor other)
-  condition: not_differentiable
+  condition: non_differentiable
   self: where(condition, grad, zeros_like(grad))
   other: where(condition, zeros_like(grad), grad)
 
   target: binary_cross_entropy_with_logits_target_backward(grad, self, target, weight, pos_weight, reduction)
 
 - name: embedding(Tensor weight, Tensor indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse)
-  indices: not_differentiable
+  indices: non_differentiable
   weight: embedding_backward(grad, indices, weight.size(0), padding_idx, scale_grad_by_freq, sparse)
 
 - name: embedding_dense_backward(Tensor grad_output, Tensor indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq)
   grad_output: embedding_dense_double_backward(grad, indices)
 
 - name: _embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, Tensor per_sample_weights)
-  indices: not_differentiable
-  offsets: not_differentiable
+  indices: non_differentiable
+  offsets: non_differentiable
   weight: _embedding_bag_backward(grad, indices, offsets, result1, result2, result3, weight.size(0), scale_grad_by_freq, mode, sparse, per_sample_weights)
   per_sample_weights: _embedding_bag_per_sample_weights_backward(grad, weight, indices, result1, mode)
 
 - name: embedding_renorm_(Tensor self, Tensor indices, double max_norm, double norm_type)
-  indices: not_differentiable
+  indices: non_differentiable
   self: not_implemented("embedding_renorm")
 
 - name: kl_div(Tensor self, Tensor target, int64_t reduction)
 # Only frst three of _cudnn_rnn outputs can have gradients.
 # _cudnn_rnn outputs: (output, hy, cy, reserve, weight_buf)
 - name: _cudnn_rnn(Tensor input, TensorList weight, int64_t weight_stride0, Tensor weight_buf, Tensor hx, Tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, Tensor dropout_state)
-  dropout_state: not_differentiable
+  dropout_state: non_differentiable
   output_differentiability: [True, True, True, False, False]
   input, hx, cx, weight: "_cudnn_rnn_backward(input, weight, weight_stride0, result4, hx, cx, result0, grads[0], grads[1], grads[2], mode, hidden_size, num_layers, batch_first, dropout, train, bidirectional, batch_sizes, dropout_state, retain_variables ? result3.clone() : result3, grad_input_mask)"
 
index ac7a041..f1a8572 100644 (file)
@@ -508,7 +508,7 @@ def emit_body(declaration):
     inputs = [arg for arg in arguments if not arg.get('output', False)]
     differentiable_inputs = list(filter(is_differentiable, inputs))
     args_with_derivatives = find_args_with_derivatives(differentiable_inputs)
-    not_differentiable_args_names = func['not_differentiable_args_names'] if func else []
+    non_differentiable_arg_names = func['non_differentiable_arg_names'] if func else []
     candidate_differentiable_outputs = list(filter(is_differentiable, returns))
 
     if func is not None and func.get('output_differentiability') is not None:
@@ -625,7 +625,7 @@ def emit_body(declaration):
             if arg in args_with_derivatives:
                 continue
             name = arg['name']
-            if name in not_differentiable_args_names:
+            if name in non_differentiable_arg_names:
                 continue
             if name == 'output':
                 # Double-backwards definitions sometimes take in 'input' and
index 1a3bf1b..83f731b 100644 (file)
@@ -29,7 +29,7 @@ def load_derivatives(path, declarations):
 
 
 # How do you feel about pasting declaration inside autograd function...
-def create_autograd_function(name, derivatives, args_with_derivatives, not_differentiable_args_names,
+def create_autograd_function(name, derivatives, args_with_derivatives, non_differentiable_arg_names,
                              signature, declaration, output_differentiability):
     op = to_camel_case(name) + 'Backward'
     op = op.replace('ForwardBackward', 'Backward')
@@ -38,7 +38,7 @@ def create_autograd_function(name, derivatives, args_with_derivatives, not_diffe
         'op': op,
         'declaration': declaration,
         'args_with_derivatives': args_with_derivatives,
-        'not_differentiable_args_names': not_differentiable_args_names,
+        'non_differentiable_arg_names': non_differentiable_arg_names,
         'signature': signature,
         'derivatives': derivatives,
         'saved_inputs': all_saved_variables(derivatives, 'saved_inputs'),
@@ -144,26 +144,26 @@ def process_definition(defn, declarations_by_signature):
 
         # Set up the derivative information
         derivatives = []
-        not_differentiable_args_names = []
+        non_differentiable_arg_names = []
         for raw_names in sorted(defn.keys()):
             formula = defn[raw_names]
             names = split_names(raw_names)
             derivative = create_derivative(declaration['arguments'], declaration['returns'],
                                            declaration['name'], formula, names)
-            if formula.lower().strip() == 'not_differentiable':
+            if formula.lower().strip() == 'non_differentiable':
                 assert not sum([type(var_name) == list
                                 for var_name in derivative['var_names']]), \
                     "Variable names associated to a formula should be a flat list"
-                not_differentiable_args_names += derivative['var_names']
+                non_differentiable_arg_names += derivative['var_names']
             else:
                 derivatives.append(derivative)
-        args_with_derivatives = list(filter(lambda x: x['name'] not in not_differentiable_args_names,
+        args_with_derivatives = list(filter(lambda x: x['name'] not in non_differentiable_arg_names,
                                             args_with_derivatives))
 
         # Test to see if the use of 'grads' makes sense.
         check_grad_usage(defn_name, declaration, derivatives)
 
-        return derivatives, args_with_derivatives, not_differentiable_args_names
+        return derivatives, args_with_derivatives, non_differentiable_arg_names
 
     def unzip(xs):
         return zip(*xs)
@@ -206,8 +206,8 @@ def process_definition(defn, declarations_by_signature):
                                'Declarations.yaml ({})'
                                .format(i, defn_name, x, y))
 
-    derivatives, args_with_derivatives, not_differentiable_args_names = set_up_derivatives(defn_name, defn, canonical)
-    return create_autograd_function(defn_name, derivatives, args_with_derivatives, not_differentiable_args_names,
+    derivatives, args_with_derivatives, non_differentiable_arg_names = set_up_derivatives(defn_name, defn, canonical)
+    return create_autograd_function(defn_name, derivatives, args_with_derivatives, non_differentiable_arg_names,
                                     signature, canonical, output_differentiability)