From 8c3285bf114c6a1f0cdb6c2ffad4d7c45ad85d5e Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Wed, 27 Mar 2019 10:15:20 -0700 Subject: [PATCH] Fix loss functions doc (#18420) Summary: Correct docstring display error on web page caused by my previous PR Pull Request resolved: https://github.com/pytorch/pytorch/pull/18420 Differential Revision: D14642467 Pulled By: soumith fbshipit-source-id: 16fdd3301a4c5bad27fbcd8686f7fbfcc1e908ee --- torch/nn/modules/conv.py | 27 ++++++++++++++------------- torch/nn/modules/loss.py | 28 ++++++++++++++-------------- torch/nn/modules/upsampling.py | 9 ++++----- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/torch/nn/modules/conv.py b/torch/nn/modules/conv.py index cc5ca78..12ba9ff 100644 --- a/torch/nn/modules/conv.py +++ b/torch/nn/modules/conv.py @@ -289,8 +289,8 @@ class Conv2d(_ConvNd): Attributes: weight (Tensor): the learnable weights of the module of shape - :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}}, - \text{kernel\_size[0]}, \text{kernel\_size[1]})`. + :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},` + :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`. The values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` @@ -393,9 +393,9 @@ class Conv3d(_ConvNd): where `K` is a positive integer, this operation is also termed in literature as depthwise convolution. - In other words, for an input of size :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, - a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments - :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. + In other words, for an input of size :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, + a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments + :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`. .. include:: cudnn_deterministic.rst @@ -428,8 +428,8 @@ class Conv3d(_ConvNd): Attributes: weight (Tensor): the learnable weights of the module of shape - :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}}, - \text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`. + :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},` + :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`. The values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}` @@ -613,8 +613,9 @@ class ConvTranspose1d(_ConvTransposeMixin, _ConvNd): Attributes: weight (Tensor): the learnable weights of the module of shape - :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}}, - \text{kernel\_size})`. The values of these weights are sampled from + :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},` + :math:`\text{kernel\_size})`. + The values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{1}{C_\text{in} * \text{kernel\_size}}` bias (Tensor): the learnable bias of the module of shape (out_channels). @@ -735,8 +736,8 @@ class ConvTranspose2d(_ConvTransposeMixin, _ConvNd): Attributes: weight (Tensor): the learnable weights of the module of shape - :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}}, - \text{kernel\_size[0]}, \text{kernel\_size[1]})`. + :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},` + :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`. The values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` @@ -890,8 +891,8 @@ class ConvTranspose3d(_ConvTransposeMixin, _ConvNd): Attributes: weight (Tensor): the learnable weights of the module of shape - :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}}, - \text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`. + :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},` + :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`. The values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}` diff --git a/torch/nn/modules/loss.py b/torch/nn/modules/loss.py index 3e5f087..2cb4069 100644 --- a/torch/nn/modules/loss.py +++ b/torch/nn/modules/loss.py @@ -356,8 +356,8 @@ class KLDivLoss(_Loss): and in the meantime, specifying either of those two args will override :attr:`reduction`. .. note:: - :attr:``reduction`` = ``'mean'`` doesn't return the true kl divergence value, please use - :attr:``reduction`` = ``'batchmean'`` which aligns with KL math definition. + :attr:`reduction` = ``'mean'`` doesn't return the true kl divergence value, please use + :attr:`reduction` = ``'batchmean'`` which aligns with KL math definition. In the next major release, ``'mean'`` will be changed to be the same as ``'batchmean'``. Shape: @@ -655,7 +655,7 @@ class HingeEmbeddingLoss(_Loss): - Input: :math:`(*)` where :math:`*` means, any number of dimensions. The sum operation operates over all the elements. - Target: :math:`(*)`, same shape as the input - - Output: scalar. If :attr:``reduction`` is ``'none'``, then same shape as the input + - Output: scalar. If :attr:`reduction` is ``'none'``, then same shape as the input """ __constants__ = ['margin', 'reduction'] @@ -711,7 +711,7 @@ class MultiLabelMarginLoss(_Loss): - Input: :math:`(C)` or :math:`(N, C)` where `N` is the batch size and `C` is the number of classes. - Target: :math:`(C)` or :math:`(N, C)`, label targets padded by -1 ensuring same shape as the input. - - Output: scalar. If :attr:``reduction`` is ``'none'``, then :math:`(N)`. + - Output: scalar. If :attr:`reduction` is ``'none'``, then :math:`(N)`. Examples:: @@ -739,7 +739,7 @@ class SmoothL1Loss(_Loss): r"""Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term otherwise. It is less sensitive to outliers than the `MSELoss` and in some cases - prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick). + prevents exploding gradients (e.g. see `Fast R-CNN` paper by Ross Girshick). Also known as the Huber loss: .. math:: @@ -853,7 +853,7 @@ class CrossEntropyLoss(_WeightedLoss): with :math:`K \geq 1` for the `K`-dimensional case (described later). This criterion expects a class index in the range :math:`[0, C-1]` as the - `target`for each value of a 1D tensor of size `minibatch`; if `ignore_index` + `target` for each value of a 1D tensor of size `minibatch`; if `ignore_index` is specified, this criterion also accepts this class index (this index may not necessarily be in the class range). @@ -1000,8 +1000,8 @@ class CosineEmbeddingLoss(_Loss): Args: margin (float, optional): Should be a number from :math:`-1` to :math:`1`, - :math:`0` to :math:`0.5` is suggested. If :attr:`margin` is missing, the - default value is :math:`0`. + :math:`0` to :math:`0.5` is suggested. If :attr:`margin` is missing, the + default value is :math:`0`. size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, the losses are averaged over each loss element in the batch. Note that for some losses, there are multiple elements per sample. If the field :attr:`size_average` @@ -1032,7 +1032,7 @@ class CosineEmbeddingLoss(_Loss): @weak_module class MarginRankingLoss(_Loss): r"""Creates a criterion that measures the loss given - inputs :math:`x1`, :math:`x2`, two 1D mini-batch `Tensor`s, + inputs :math:`x1`, :math:`x2`, two 1D mini-batch `Tensors`, and a label 1D mini-batch tensor :math:`y` (containing 1 or -1). If :math:`y = 1` then it assumed the first input should be ranked higher @@ -1146,8 +1146,8 @@ class TripletMarginLoss(_Loss): r"""Creates a criterion that measures the triplet loss given an input tensors :math:`x1`, :math:`x2`, :math:`x3` and a margin with a value greater than :math:`0`. This is used for measuring a relative similarity between samples. A triplet - is composed by `a`, `p` and `n`: `anchor`, `positive examples` and `negative - examples` respectively. The shapes of all input tensors should be + is composed by `a`, `p` and `n` (i.e., `anchor`, `positive examples` and `negative + examples` respectively). The shapes of all input tensors should be :math:`(N, D)`. The distance swap is described in detail in the paper `Learning shallow @@ -1224,9 +1224,9 @@ class CTCLoss(_Loss): Args: blank (int, optional): blank label. Default :math:`0`. reduction (string, optional): Specifies the reduction to apply to the output: - 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, - 'mean': the output losses will be divided by the target lengths and - then the mean over the batch is taken. Default: 'mean' + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, + ``'mean'``: the output losses will be divided by the target lengths and + then the mean over the batch is taken. Default: ``'mean'`` zero_infinity (bool, optional): Whether to zero infinite losses and the associated gradients. Default: ``False`` diff --git a/torch/nn/modules/upsampling.py b/torch/nn/modules/upsampling.py index 261edcb..b41ad73 100644 --- a/torch/nn/modules/upsampling.py +++ b/torch/nn/modules/upsampling.py @@ -22,11 +22,10 @@ class Upsample(Module): calculate the output size. (You cannot give both, as it is ambiguous) Args: - size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int], - optional): output spatial sizes - scale_factor (float or Tuple[float] or Tuple[float, float] or - Tuple[float, float, float], optional): multiplier for spatial size. - Has to match input size if it is a tuple. + size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int], optional): + output spatial sizes + scale_factor (float or Tuple[float] or Tuple[float, float] or Tuple[float, float, float], optional): + multiplier for spatial size. Has to match input size if it is a tuple. mode (str, optional): the upsampling algorithm: one of ``'nearest'``, ``'linear'``, ``'bilinear'``, ``'bicubic'`` and ``'trilinear'``. Default: ``'nearest'`` -- 2.7.4