From: Sasha Rush Date: Thu, 17 Jan 2019 18:04:51 +0000 (-0800) Subject: Unify the shape notation for all of the pytorch modules (#15741) X-Git-Tag: accepted/tizen/6.5/unified/20211028.231830~1792 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dbe6a7a9ff1a364a8706bf5df58a1ca96d2fd9da;p=platform%2Fupstream%2Fpytorch.git Unify the shape notation for all of the pytorch modules (#15741) Summary: PR to update the shape notation for all of the torch.nn modules to take a unified form. The goal is to make these definitions machine-readable and those checkable by unifying the style across all of the different modules. Pull Request resolved: https://github.com/pytorch/pytorch/pull/15741 Differential Revision: D13709601 Pulled By: ezyang fbshipit-source-id: fb89a03903fdf0cd0dcf76f3e469b8582b2f3634 --- diff --git a/torch/nn/modules/activation.py b/torch/nn/modules/activation.py index 91bc013..d4d151e 100644 --- a/torch/nn/modules/activation.py +++ b/torch/nn/modules/activation.py @@ -439,9 +439,9 @@ class GLU(Module): dim (int): the dimension on which to split the input. Default: -1 Shape: - - Input: :math:`(*, N, *)` where `*` means, any number of additional + - Input: :math:`(\ast_1, N, \ast_2)` where `*` means, any number of additional dimensions - - Output: :math:`(*, N / 2, *)` + - Output: :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2` Examples:: @@ -794,8 +794,9 @@ class Softmin(Module): \text{Softmin}(x_{i}) = \frac{\exp(-x_i)}{\sum_j \exp(-x_j)} Shape: - - Input: any shape - - Output: same as input + - Input: :math:`(*)` where `*` means, any number of additional + dimensions + - Output: :math:`(*)`, same shape as the input Arguments: dim (int): A dimension along which Softmin will be computed (so every slice @@ -834,8 +835,9 @@ class Softmax(Module): \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)} Shape: - - Input: any shape - - Output: same as input + - Input: :math:`(*)` where `*` means, any number of additional + dimensions + - Output: :math:`(*)`, same shape as the input Returns: a Tensor of the same dimension and shape as the input with @@ -910,8 +912,9 @@ class LogSoftmax(Module): \text{LogSoftmax}(x_{i}) = \log\left(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} \right) Shape: - - Input: any shape - - Output: same as input + - Input: :math:`(*)` where `*` means, any number of additional + dimensions + - Output: :math:`(*)`, same shape as the input Arguments: dim (int): A dimension along which Softmax will be computed (so every slice diff --git a/torch/nn/modules/adaptive.py b/torch/nn/modules/adaptive.py index 55e1630..d3cfb7b 100644 --- a/torch/nn/modules/adaptive.py +++ b/torch/nn/modules/adaptive.py @@ -89,8 +89,8 @@ class AdaptiveLogSoftmaxWithLoss(Module): Shape: - input: :math:`(N, in\_features)` - target: :math:`(N)` where each value satisfies :math:`0 <= target[i] <= n\_classes` - - output: :math:`(N)` - - loss: ``Scalar`` + - output1: :math:`(N)` + - output2: ``Scalar`` .. _Efficient softmax approximation for GPUs: diff --git a/torch/nn/modules/container.py b/torch/nn/modules/container.py index 7f8181e..e75ed99 100644 --- a/torch/nn/modules/container.py +++ b/torch/nn/modules/container.py @@ -41,6 +41,11 @@ class Sequential(Module): ('conv2', nn.Conv2d(20,64,5)), ('relu2', nn.ReLU()) ])) + + Shape: + - Input: :math:`(*)` where `*` means, any number of additional + dimensions + - Output: :math:`(*)`, same shape as the input """ def __init__(self, *args): diff --git a/torch/nn/modules/dropout.py b/torch/nn/modules/dropout.py index e9ec872..6dab40f 100644 --- a/torch/nn/modules/dropout.py +++ b/torch/nn/modules/dropout.py @@ -40,8 +40,8 @@ class Dropout(_DropoutNd): inplace: If set to ``True``, will do this operation in-place. Default: ``False`` Shape: - - Input: `Any`. Input can be of any shape - - Output: `Same`. Output is of the same shape as input + - Input: :math:`(*)`. Input can be of any shape + - Output: :math:`(*)`. Output is of the same shape as input Examples:: @@ -173,8 +173,8 @@ class AlphaDropout(_DropoutNd): in-place Shape: - - Input: `Any`. Input can be of any shape - - Output: `Same`. Output is of the same shape as input + - Input: :math:`(*)`. Input can be of any shape + - Output: :math:`(*)`. Output is of the same shape as input Examples:: diff --git a/torch/nn/modules/linear.py b/torch/nn/modules/linear.py index 657f382..dd8a673 100644 --- a/torch/nn/modules/linear.py +++ b/torch/nn/modules/linear.py @@ -19,10 +19,10 @@ class Linear(Module): Default: ``True`` Shape: - - Input: :math:`(N, *, \text{in\_features})` where :math:`*` means any number of - additional dimensions - - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension - are the same shape as the input. + - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of + additional dimensions and :math:`H_{in} = \text{in\_features}` + - Output: :math:`(N, *, H_{out})` where all but the last dimension + are the same shape as the input and :math:`H_{out} = \text{out\_features}`. Attributes: weight: the learnable weights of the module of shape @@ -85,11 +85,12 @@ class Bilinear(Module): Default: ``True`` Shape: - - Input: :math:`(N, *, \text{in1\_features})`, :math:`(N, *, \text{in2\_features})` - where :math:`*` means any number of additional dimensions. All but the last - dimension of the inputs should be the same. - - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension - are the same shape as the input. + - Input1: :math:`(N, *, H_{in1})` where :math:`H_{in1}=\text{in1\_features}` and + :math:`*` means any number of additional dimensions. All but the last dimension + of the inputs should be the same. + - Input2: :math:`(N, *, H_{in2})` where :math:`H_{in2}=\text{in2\_features}`. + - Output: :math:`(N, *, H_{out})` where :math:`H_{out}=\text{out\_features}` + and all but the last dimension are the same shape as the input. Attributes: weight: the learnable weights of the module of shape diff --git a/torch/nn/modules/loss.py b/torch/nn/modules/loss.py index 900756b..e0c0c99 100644 --- a/torch/nn/modules/loss.py +++ b/torch/nn/modules/loss.py @@ -270,6 +270,13 @@ class PoissonNLLLoss(_Loss): >>> target = torch.randn(5, 2) >>> output = loss(log_input, target) >>> output.backward() + + Shape: + - Input: :math:`(N, *)` where `*` means, any number of additional + dimensions + - Target: :math:`(N, *)`, same shape as the input + - Output: scalar by default. If `reduce` is ``False``, then :math:`(N, *)`, + the same shape as the input """ __constants__ = ['log_input', 'full', 'eps', 'reduction'] @@ -350,10 +357,10 @@ class KLDivLoss(_Loss): Shape: - - input: :math:`(N, *)` where `*` means, any number of additional + - Input: :math:`(N, *)` where `*` means, any number of additional dimensions - - target: :math:`(N, *)`, same shape as the input - - output: scalar by default. If `reduce` is ``False``, then :math:`(N, *)`, + - Target: :math:`(N, *)`, same shape as the input + - Output: scalar by default. If `reduce` is ``False``, then :math:`(N, *)`, the same shape as the input """ @@ -571,6 +578,8 @@ class BCEWithLogitsLoss(_Loss): - Input: :math:`(N, *)` where `*` means, any number of additional dimensions - Target: :math:`(N, *)`, same shape as the input + - Output: scalar. If `reduce` is False, then :math:`(N, *)`, same + shape as input. Examples:: @@ -640,8 +649,9 @@ class HingeEmbeddingLoss(_Loss): specifying either of those two args will override :attr:`reduction`. Default: 'mean' Shape: - - Input: Tensor of arbitrary shape. The sum operation operates over all the elements. - - Target: Same shape as input. + - Input: :math:`(*)` where `*` means, any number of dimensions. The sum operation + operates over all the elements. + - Target: :math:`(*)`, same shape as the input - Output: scalar. If reduce is ``False``, then same shape as the input """ __constants__ = ['margin', 'reduction'] @@ -797,8 +807,9 @@ class SoftMarginLoss(_Loss): specifying either of those two args will override :attr:`reduction`. Default: 'mean' Shape: - - Input: Tensor of arbitrary shape. - - Target: Same shape as input. + - Input: :math:`(*)` where `*` means, any number of additional + dimensions + - Target: :math:`(*)`, same shape as the input - Output: scalar. If reduce is ``False``, then same shape as the input """ diff --git a/torch/nn/modules/normalization.py b/torch/nn/modules/normalization.py index 8ef72ca..a21b858 100644 --- a/torch/nn/modules/normalization.py +++ b/torch/nn/modules/normalization.py @@ -25,8 +25,8 @@ class LocalResponseNorm(Module): k: additive factor. Default: 1 Shape: - - Input: :math:`(N, C, ...)` - - Output: :math:`(N, C, ...)` (same shape as input) + - Input: :math:`(N, C, *)` + - Output: :math:`(N, C, *)` (same shape as input) Examples:: @@ -188,8 +188,8 @@ class GroupNorm(Module): and zeros (for biases). Default: ``True``. Shape: - - Input: :math:`(N, num\_channels, *)` - - Output: :math:`(N, num\_channels, *)` (same shape as input) + - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}` + - Output: :math:`(N, C, *)` (same shape as input) Examples:: diff --git a/torch/nn/modules/pixelshuffle.py b/torch/nn/modules/pixelshuffle.py index 971a226..79471a9 100644 --- a/torch/nn/modules/pixelshuffle.py +++ b/torch/nn/modules/pixelshuffle.py @@ -19,8 +19,9 @@ class PixelShuffle(Module): upscale_factor (int): factor to increase spatial resolution by Shape: - - Input: :math:`(N, C \times \text{upscale_factor}^2, H, W)` - - Output: :math:`(N, C, H \times \text{upscale_factor}, W \times \text{upscale_factor})` + - Input: :math:`(N, L, H_{in}, W_{in})` where :math:`L=C \times \text{upscale\_factor}^2` + - Output: :math:`(N, C, H_{out}, W_{out})` where :math:`H \times \text{upscale\_factor}` + and :math:`W \times \text{upscale\_factor}` Examples:: diff --git a/torch/nn/modules/rnn.py b/torch/nn/modules/rnn.py index 6578f8f..cc77dde 100644 --- a/torch/nn/modules/rnn.py +++ b/torch/nn/modules/rnn.py @@ -308,6 +308,18 @@ class RNN(RNNBase): Like *output*, the layers can be separated using ``h_n.view(num_layers, num_directions, batch, hidden_size)``. + Shape: + - Input1: :math:`(L, N, H_{in})` tensor containing input features where + :math:`H_{in}=\text{input\_size}` and `L` represents a sequence length. + - Input2: :math:`(S, N, H_{out})` tensor + containing the initial hidden state for each element in the batch. + :math:`H_{out}=\text{hidden\_size}` + Defaults to zero if not provided. where :math:`S=\text{num\_layers} * \text{num\_directions}` + If the RNN is bidirectional, num_directions should be 2, else it should be 1. + - Output1: :math:`(L, N, H_{all})` where :math:`H_all=\text{num\_directions} * \text{hidden\_size}` + - Output2: :math:`(S, N, H_{out})` tensor containing the next hidden state + for each element in the batch + Attributes: weight_ih_l[k]: the learnable input-hidden weights of the k-th layer, of shape `(hidden_size * input_size)` for `k = 0`. Otherwise, the shape is @@ -525,6 +537,18 @@ class GRU(RNNBase): Like *output*, the layers can be separated using ``h_n.view(num_layers, num_directions, batch, hidden_size)``. + Shape: + - Input1: :math:`(L, N, H_{in})` tensor containing input features where + :math:`H_{in}=\text{input\_size}` and `L` represents a sequence length. + - Input2: :math:`(S, N, H_{out})` tensor + containing the initial hidden state for each element in the batch. + :math:`H_{out}=\text{hidden\_size}` + Defaults to zero if not provided. where :math:`S=\text{num\_layers} * \text{num\_directions}` + If the RNN is bidirectional, num_directions should be 2, else it should be 1. + - Output1: :math:`(L, N, H_{all})` where :math:`H_all=\text{num\_directions} * \text{hidden\_size}` + - Output2: :math:`(S, N, H_{out})` tensor containing the next hidden state + for each element in the batch + Attributes: weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer (W_ir|W_iz|W_in), of shape `(3*hidden_size x input_size)` @@ -632,6 +656,15 @@ class RNNCell(RNNCellBase): - **h'** of shape `(batch, hidden_size)`: tensor containing the next hidden state for each element in the batch + Shape: + - Input1: :math:`(N, H_{in})` tensor containing input features where + :math:`H_{in}` = `input_size` + - Input2: :math:`(N, H_{out})` tensor containing the initial hidden + state for each element in the batch where :math:`H_{out}` = `hidden_size` + Defaults to zero if not provided. + - Output: :math:`(N, H_{out})` tensor containing the next hidden state + for each element in the batch + Attributes: weight_ih: the learnable input-hidden weights, of shape `(hidden_size x input_size)` @@ -802,6 +835,15 @@ class GRUCell(RNNCellBase): - **h'** of shape `(batch, hidden_size)`: tensor containing the next hidden state for each element in the batch + Shape: + - Input1: :math:`(N, H_{in})` tensor containing input features where + :math:`H_{in}` = `input_size` + - Input2: :math:`(N, H_{out})` tensor containing the initial hidden + state for each element in the batch where :math:`H_{out}` = `hidden_size` + Defaults to zero if not provided. + - Output: :math:`(N, H_{out})` tensor containing the next hidden state + for each element in the batch + Attributes: weight_ih: the learnable input-hidden weights, of shape `(3*hidden_size x input_size)` diff --git a/torch/nn/modules/sparse.py b/torch/nn/modules/sparse.py index 38670c6..054b0d8 100644 --- a/torch/nn/modules/sparse.py +++ b/torch/nn/modules/sparse.py @@ -33,9 +33,8 @@ class Embedding(Module): initialized from :math:`\mathcal{N}(0, 1)` Shape: - - - Input: LongTensor of arbitrary shape containing the indices to extract - - Output: `(*, embedding_dim)`, where `*` is the input shape + - Input: :math:`(*)`, LongTensor of arbitrary shape containing the indices to extract + - Output: :math:`(*, H)`, where `*` is the input shape and :math:`H=\text{embedding\_dim}` .. note:: Keep in mind that only a limited number of optimizers support