# Note that a single gradient entry can specify the gradient
# formula for multiple input names, by specifying a key
# "input1, input2" (see atan2 for an example).
-# - An argument can be flagged as 'not_differentiable'.
+# - An argument can be flagged as 'non_differentiable'.
# In general there are 3 possibilities:
# 1. An argument has an entry with a specified gradient
# 2. An argument has an entry specified as not differentiable
# 3. An argument has no entry
-# Using the flag 'not_differentiable' resolves to the second case.
+# Using the flag 'non_differentiable' resolves to the second case.
# The second case was introduced in support for arguments of
# type e.g. IndexTensor for 'embedding', that are not differentiable.
# TODO: Determine whether case 3 and case 2 can be replaced by one concept.
self: grad.reshape(self.sizes())
- name: _s_where(Tensor condition, Tensor self, Tensor other)
- condition: not_differentiable
+ condition: non_differentiable
self: where(condition, grad, zeros_like(grad))
other: where(condition, zeros_like(grad), grad)
target: binary_cross_entropy_with_logits_target_backward(grad, self, target, weight, pos_weight, reduction)
- name: embedding(Tensor weight, Tensor indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse)
- indices: not_differentiable
+ indices: non_differentiable
weight: embedding_backward(grad, indices, weight.size(0), padding_idx, scale_grad_by_freq, sparse)
- name: embedding_dense_backward(Tensor grad_output, Tensor indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq)
grad_output: embedding_dense_double_backward(grad, indices)
- name: _embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, Tensor per_sample_weights)
- indices: not_differentiable
- offsets: not_differentiable
+ indices: non_differentiable
+ offsets: non_differentiable
weight: _embedding_bag_backward(grad, indices, offsets, result1, result2, result3, weight.size(0), scale_grad_by_freq, mode, sparse, per_sample_weights)
per_sample_weights: _embedding_bag_per_sample_weights_backward(grad, weight, indices, result1, mode)
- name: embedding_renorm_(Tensor self, Tensor indices, double max_norm, double norm_type)
- indices: not_differentiable
+ indices: non_differentiable
self: not_implemented("embedding_renorm")
- name: kl_div(Tensor self, Tensor target, int64_t reduction)
# Only frst three of _cudnn_rnn outputs can have gradients.
# _cudnn_rnn outputs: (output, hy, cy, reserve, weight_buf)
- name: _cudnn_rnn(Tensor input, TensorList weight, int64_t weight_stride0, Tensor weight_buf, Tensor hx, Tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, IntArrayRef batch_sizes, Tensor dropout_state)
- dropout_state: not_differentiable
+ dropout_state: non_differentiable
output_differentiability: [True, True, True, False, False]
input, hx, cx, weight: "_cudnn_rnn_backward(input, weight, weight_stride0, result4, hx, cx, result0, grads[0], grads[1], grads[2], mode, hidden_size, num_layers, batch_first, dropout, train, bidirectional, batch_sizes, dropout_state, retain_variables ? result3.clone() : result3, grad_input_mask)"
# How do you feel about pasting declaration inside autograd function...
-def create_autograd_function(name, derivatives, args_with_derivatives, not_differentiable_args_names,
+def create_autograd_function(name, derivatives, args_with_derivatives, non_differentiable_arg_names,
signature, declaration, output_differentiability):
op = to_camel_case(name) + 'Backward'
op = op.replace('ForwardBackward', 'Backward')
'op': op,
'declaration': declaration,
'args_with_derivatives': args_with_derivatives,
- 'not_differentiable_args_names': not_differentiable_args_names,
+ 'non_differentiable_arg_names': non_differentiable_arg_names,
'signature': signature,
'derivatives': derivatives,
'saved_inputs': all_saved_variables(derivatives, 'saved_inputs'),
# Set up the derivative information
derivatives = []
- not_differentiable_args_names = []
+ non_differentiable_arg_names = []
for raw_names in sorted(defn.keys()):
formula = defn[raw_names]
names = split_names(raw_names)
derivative = create_derivative(declaration['arguments'], declaration['returns'],
declaration['name'], formula, names)
- if formula.lower().strip() == 'not_differentiable':
+ if formula.lower().strip() == 'non_differentiable':
assert not sum([type(var_name) == list
for var_name in derivative['var_names']]), \
"Variable names associated to a formula should be a flat list"
- not_differentiable_args_names += derivative['var_names']
+ non_differentiable_arg_names += derivative['var_names']
else:
derivatives.append(derivative)
- args_with_derivatives = list(filter(lambda x: x['name'] not in not_differentiable_args_names,
+ args_with_derivatives = list(filter(lambda x: x['name'] not in non_differentiable_arg_names,
args_with_derivatives))
# Test to see if the use of 'grads' makes sense.
check_grad_usage(defn_name, declaration, derivatives)
- return derivatives, args_with_derivatives, not_differentiable_args_names
+ return derivatives, args_with_derivatives, non_differentiable_arg_names
def unzip(xs):
return zip(*xs)
'Declarations.yaml ({})'
.format(i, defn_name, x, y))
- derivatives, args_with_derivatives, not_differentiable_args_names = set_up_derivatives(defn_name, defn, canonical)
- return create_autograd_function(defn_name, derivatives, args_with_derivatives, not_differentiable_args_names,
+ derivatives, args_with_derivatives, non_differentiable_arg_names = set_up_derivatives(defn_name, defn, canonical)
+ return create_autograd_function(defn_name, derivatives, args_with_derivatives, non_differentiable_arg_names,
signature, canonical, output_differentiability)