Unify the shape notation for all of the pytorch modules (#15741)

author Sasha Rush <srush@seas.harvard.edu>

Thu, 17 Jan 2019 18:04:51 +0000 (10:04 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Thu, 17 Jan 2019 18:32:14 +0000 (10:32 -0800)
author Sasha Rush <srush@seas.harvard.edu>
Thu, 17 Jan 2019 18:04:51 +0000 (10:04 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Thu, 17 Jan 2019 18:32:14 +0000 (10:32 -0800)
diff --git a/torch/nn/modules/activation.py b/torch/nn/modules/activation.py

index 91bc013..d4d151e 100644 (file)
--- a/torch/nn/modules/activation.py
+++ b/torch/nn/modules/activation.py
@@ -439,9 +439,9 @@ class GLU(Module):
          dim (int): the dimension on which to split the input. Default: -1
  
      Shape:
-        - Input: :math:`(*, N, *)` where `*` means, any number of additional
+        - Input: :math:`(\ast_1, N, \ast_2)` where `*` means, any number of additional
            dimensions
-        - Output: :math:`(*, N / 2, *)`
+        - Output: :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2`
  
      Examples::
  
@@ -794,8 +794,9 @@ class Softmin(Module):
          \text{Softmin}(x_{i}) = \frac{\exp(-x_i)}{\sum_j \exp(-x_j)}
  
      Shape:
-        - Input: any shape
-        - Output: same as input
+        - Input: :math:`(*)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(*)`, same shape as the input
  
      Arguments:
          dim (int): A dimension along which Softmin will be computed (so every slice
@@ -834,8 +835,9 @@ class Softmax(Module):
          \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
  
      Shape:
-        - Input: any shape
-        - Output: same as input
+        - Input: :math:`(*)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(*)`, same shape as the input
  
      Returns:
          a Tensor of the same dimension and shape as the input with
@@ -910,8 +912,9 @@ class LogSoftmax(Module):
          \text{LogSoftmax}(x_{i}) = \log\left(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} \right)
  
      Shape:
-        - Input: any shape
-        - Output: same as input
+        - Input: :math:`(*)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(*)`, same shape as the input
  
      Arguments:
          dim (int): A dimension along which Softmax will be computed (so every slice
diff --git a/torch/nn/modules/adaptive.py b/torch/nn/modules/adaptive.py

index 55e1630..d3cfb7b 100644 (file)
--- a/torch/nn/modules/adaptive.py
+++ b/torch/nn/modules/adaptive.py
@@ -89,8 +89,8 @@ class AdaptiveLogSoftmaxWithLoss(Module):
      Shape:
          - input: :math:`(N, in\_features)`
          - target: :math:`(N)` where each value satisfies :math:`0 <= target[i] <= n\_classes`
-        - output: :math:`(N)`
-        - loss: ``Scalar``
+        - output1: :math:`(N)`
+        - output2: ``Scalar``
  
  
      .. _Efficient softmax approximation for GPUs:
diff --git a/torch/nn/modules/container.py b/torch/nn/modules/container.py

index 7f8181e..e75ed99 100644 (file)
--- a/torch/nn/modules/container.py
+++ b/torch/nn/modules/container.py
@@ -41,6 +41,11 @@ class Sequential(Module):
                    ('conv2', nn.Conv2d(20,64,5)),
                    ('relu2', nn.ReLU())
                  ]))
+
+    Shape:
+        - Input: :math:`(*)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(*)`, same shape as the input
      """
  
      def __init__(self, *args):
diff --git a/torch/nn/modules/dropout.py b/torch/nn/modules/dropout.py

index e9ec872..6dab40f 100644 (file)
--- a/torch/nn/modules/dropout.py
+++ b/torch/nn/modules/dropout.py
@@ -40,8 +40,8 @@ class Dropout(_DropoutNd):
          inplace: If set to ``True``, will do this operation in-place. Default: ``False``
  
      Shape:
-        - Input: `Any`. Input can be of any shape
-        - Output: `Same`. Output is of the same shape as input
+        - Input: :math:`(*)`. Input can be of any shape
+        - Output: :math:`(*)`. Output is of the same shape as input
  
      Examples::
  
@@ -173,8 +173,8 @@ class AlphaDropout(_DropoutNd):
              in-place
  
      Shape:
-        - Input: `Any`. Input can be of any shape
-        - Output: `Same`. Output is of the same shape as input
+        - Input: :math:`(*)`. Input can be of any shape
+        - Output: :math:`(*)`. Output is of the same shape as input
  
      Examples::
  
diff --git a/torch/nn/modules/linear.py b/torch/nn/modules/linear.py

index 657f382..dd8a673 100644 (file)
--- a/torch/nn/modules/linear.py
+++ b/torch/nn/modules/linear.py
@@ -19,10 +19,10 @@ class Linear(Module):
              Default: ``True``
  
      Shape:
-        - Input: :math:`(N, *, \text{in\_features})` where :math:`*` means any number of
-          additional dimensions
-        - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension
-          are the same shape as the input.
+        - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
+          additional dimensions and :math:`H_{in} = \text{in\_features}`
+        - Output: :math:`(N, *, H_{out})` where all but the last dimension
+          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.
  
      Attributes:
          weight: the learnable weights of the module of shape
@@ -85,11 +85,12 @@ class Bilinear(Module):
              Default: ``True``
  
      Shape:
-        - Input: :math:`(N, *, \text{in1\_features})`, :math:`(N, *, \text{in2\_features})`
-          where :math:`*` means any number of additional dimensions. All but the last
-          dimension of the inputs should be the same.
-        - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension
-          are the same shape as the input.
+        - Input1: :math:`(N, *, H_{in1})` where :math:`H_{in1}=\text{in1\_features}` and
+          :math:`*` means any number of additional dimensions. All but the last dimension
+          of the inputs should be the same.
+        - Input2: :math:`(N, *, H_{in2})` where :math:`H_{in2}=\text{in2\_features}`.
+        - Output: :math:`(N, *, H_{out})` where :math:`H_{out}=\text{out\_features}`
+          and all but the last dimension are the same shape as the input.
  
      Attributes:
          weight: the learnable weights of the module of shape
diff --git a/torch/nn/modules/loss.py b/torch/nn/modules/loss.py

index 900756b..e0c0c99 100644 (file)
--- a/torch/nn/modules/loss.py
+++ b/torch/nn/modules/loss.py
@@ -270,6 +270,13 @@ class PoissonNLLLoss(_Loss):
          >>> target = torch.randn(5, 2)
          >>> output = loss(log_input, target)
          >>> output.backward()
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Target: :math:`(N, *)`, same shape as the input
+        - Output: scalar by default. If `reduce` is ``False``, then :math:`(N, *)`,
+          the same shape as the input
      """
      __constants__ = ['log_input', 'full', 'eps', 'reduction']
  
@@ -350,10 +357,10 @@ class KLDivLoss(_Loss):
  
  
      Shape:
-        - input: :math:`(N, *)` where `*` means, any number of additional
+        - Input: :math:`(N, *)` where `*` means, any number of additional
            dimensions
-        - target: :math:`(N, *)`, same shape as the input
-        - output: scalar by default. If `reduce` is ``False``, then :math:`(N, *)`,
+        - Target: :math:`(N, *)`, same shape as the input
+        - Output: scalar by default. If `reduce` is ``False``, then :math:`(N, *)`,
            the same shape as the input
  
      """
@@ -571,6 +578,8 @@ class BCEWithLogitsLoss(_Loss):
           - Input: :math:`(N, *)` where `*` means, any number of additional
             dimensions
           - Target: :math:`(N, *)`, same shape as the input
+         - Output: scalar. If `reduce` is False, then :math:`(N, *)`, same
+           shape as input.
  
       Examples::
  
@@ -640,8 +649,9 @@ class HingeEmbeddingLoss(_Loss):
              specifying either of those two args will override :attr:`reduction`. Default: 'mean'
  
      Shape:
-        - Input: Tensor of arbitrary shape. The sum operation operates over all the elements.
-        - Target: Same shape as input.
+        - Input: :math:`(*)` where `*` means, any number of dimensions. The sum operation
+          operates over all the elements.
+        - Target: :math:`(*)`, same shape as the input
          - Output: scalar. If reduce is ``False``, then same shape as the input
      """
      __constants__ = ['margin', 'reduction']
@@ -797,8 +807,9 @@ class SoftMarginLoss(_Loss):
              specifying either of those two args will override :attr:`reduction`. Default: 'mean'
  
      Shape:
-        - Input: Tensor of arbitrary shape.
-        - Target: Same shape as input.
+        - Input: :math:`(*)` where `*` means, any number of additional
+          dimensions
+        - Target: :math:`(*)`, same shape as the input
          - Output: scalar. If reduce is ``False``, then same shape as the input
  
      """
diff --git a/torch/nn/modules/normalization.py b/torch/nn/modules/normalization.py

index 8ef72ca..a21b858 100644 (file)
--- a/torch/nn/modules/normalization.py
+++ b/torch/nn/modules/normalization.py
@@ -25,8 +25,8 @@ class LocalResponseNorm(Module):
          k: additive factor. Default: 1
  
      Shape:
-        - Input: :math:`(N, C, ...)`
-        - Output: :math:`(N, C, ...)` (same shape as input)
+        - Input: :math:`(N, C, *)`
+        - Output: :math:`(N, C, *)` (same shape as input)
  
      Examples::
  
@@ -188,8 +188,8 @@ class GroupNorm(Module):
              and zeros (for biases). Default: ``True``.
  
      Shape:
-        - Input: :math:`(N, num\_channels, *)`
-        - Output: :math:`(N, num\_channels, *)` (same shape as input)
+        - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
+        - Output: :math:`(N, C, *)` (same shape as input)
  
      Examples::
  
diff --git a/torch/nn/modules/pixelshuffle.py b/torch/nn/modules/pixelshuffle.py

index 971a226..79471a9 100644 (file)
--- a/torch/nn/modules/pixelshuffle.py
+++ b/torch/nn/modules/pixelshuffle.py
@@ -19,8 +19,9 @@ class PixelShuffle(Module):
          upscale_factor (int): factor to increase spatial resolution by
  
      Shape:
-        - Input: :math:`(N, C \times \text{upscale_factor}^2, H, W)`
-        - Output: :math:`(N, C, H \times \text{upscale_factor}, W \times \text{upscale_factor})`
+        - Input: :math:`(N, L, H_{in}, W_{in})` where :math:`L=C \times \text{upscale\_factor}^2`
+        - Output: :math:`(N, C, H_{out}, W_{out})` where :math:`H \times \text{upscale\_factor}`
+          and :math:`W \times \text{upscale\_factor}`
  
      Examples::
  
diff --git a/torch/nn/modules/rnn.py b/torch/nn/modules/rnn.py

index 6578f8f..cc77dde 100644 (file)
--- a/torch/nn/modules/rnn.py
+++ b/torch/nn/modules/rnn.py
@@ -308,6 +308,18 @@ class RNN(RNNBase):
            Like *output*, the layers can be separated using
            ``h_n.view(num_layers, num_directions, batch, hidden_size)``.
  
+    Shape:
+        - Input1: :math:`(L, N, H_{in})` tensor containing input features where
+          :math:`H_{in}=\text{input\_size}` and `L` represents a sequence length.
+        - Input2: :math:`(S, N, H_{out})` tensor
+          containing the initial hidden state for each element in the batch.
+          :math:`H_{out}=\text{hidden\_size}`
+          Defaults to zero if not provided. where :math:`S=\text{num\_layers} * \text{num\_directions}`
+          If the RNN is bidirectional, num_directions should be 2, else it should be 1.
+        - Output1: :math:`(L, N, H_{all})` where :math:`H_all=\text{num\_directions} * \text{hidden\_size}`
+        - Output2: :math:`(S, N, H_{out})` tensor containing the next hidden state
+          for each element in the batch
+
      Attributes:
          weight_ih_l[k]: the learnable input-hidden weights of the k-th layer,
              of shape `(hidden_size * input_size)` for `k = 0`. Otherwise, the shape is
@@ -525,6 +537,18 @@ class GRU(RNNBase):
            Like *output*, the layers can be separated using
            ``h_n.view(num_layers, num_directions, batch, hidden_size)``.
  
+    Shape:
+        - Input1: :math:`(L, N, H_{in})` tensor containing input features where
+          :math:`H_{in}=\text{input\_size}` and `L` represents a sequence length.
+        - Input2: :math:`(S, N, H_{out})` tensor
+          containing the initial hidden state for each element in the batch.
+          :math:`H_{out}=\text{hidden\_size}`
+          Defaults to zero if not provided. where :math:`S=\text{num\_layers} * \text{num\_directions}`
+          If the RNN is bidirectional, num_directions should be 2, else it should be 1.
+        - Output1: :math:`(L, N, H_{all})` where :math:`H_all=\text{num\_directions} * \text{hidden\_size}`
+        - Output2: :math:`(S, N, H_{out})` tensor containing the next hidden state
+          for each element in the batch
+
      Attributes:
          weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
              (W_ir|W_iz|W_in), of shape `(3*hidden_size x input_size)`
@@ -632,6 +656,15 @@ class RNNCell(RNNCellBase):
          - **h'** of shape `(batch, hidden_size)`: tensor containing the next hidden state
            for each element in the batch
  
+    Shape:
+        - Input1: :math:`(N, H_{in})` tensor containing input features where
+          :math:`H_{in}` = `input_size`
+        - Input2: :math:`(N, H_{out})` tensor containing the initial hidden
+          state for each element in the batch where :math:`H_{out}` = `hidden_size`
+          Defaults to zero if not provided.
+        - Output: :math:`(N, H_{out})` tensor containing the next hidden state
+          for each element in the batch
+
      Attributes:
          weight_ih: the learnable input-hidden weights, of shape
              `(hidden_size x input_size)`
@@ -802,6 +835,15 @@ class GRUCell(RNNCellBase):
          - **h'** of shape `(batch, hidden_size)`: tensor containing the next hidden state
            for each element in the batch
  
+    Shape:
+        - Input1: :math:`(N, H_{in})` tensor containing input features where
+          :math:`H_{in}` = `input_size`
+        - Input2: :math:`(N, H_{out})` tensor containing the initial hidden
+          state for each element in the batch where :math:`H_{out}` = `hidden_size`
+          Defaults to zero if not provided.
+        - Output: :math:`(N, H_{out})` tensor containing the next hidden state
+          for each element in the batch
+
      Attributes:
          weight_ih: the learnable input-hidden weights, of shape
              `(3*hidden_size x input_size)`
diff --git a/torch/nn/modules/sparse.py b/torch/nn/modules/sparse.py

index 38670c6..054b0d8 100644 (file)
--- a/torch/nn/modules/sparse.py
+++ b/torch/nn/modules/sparse.py
@@ -33,9 +33,8 @@ class Embedding(Module):
                           initialized from :math:`\mathcal{N}(0, 1)`
  
      Shape:
-
-        - Input: LongTensor of arbitrary shape containing the indices to extract
-        - Output: `(*, embedding_dim)`, where `*` is the input shape
+        - Input: :math:`(*)`, LongTensor of arbitrary shape containing the indices to extract
+        - Output: :math:`(*, H)`, where `*` is the input shape and :math:`H=\text{embedding\_dim}`
  
      .. note::
          Keep in mind that only a limited number of optimizers support
author	Sasha Rush <srush@seas.harvard.edu>
	Thu, 17 Jan 2019 18:04:51 +0000 (10:04 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Thu, 17 Jan 2019 18:32:14 +0000 (10:32 -0800)
torch/nn/modules/activation.py		patch \| blob \| history
torch/nn/modules/adaptive.py		patch \| blob \| history
torch/nn/modules/container.py		patch \| blob \| history
torch/nn/modules/dropout.py		patch \| blob \| history
torch/nn/modules/linear.py		patch \| blob \| history
torch/nn/modules/loss.py		patch \| blob \| history
torch/nn/modules/normalization.py		patch \| blob \| history
torch/nn/modules/pixelshuffle.py		patch \| blob \| history
torch/nn/modules/rnn.py		patch \| blob \| history
torch/nn/modules/sparse.py		patch \| blob \| history