From 54b72a99ef1d44eb1883c39f0be79ed41a789df4 Mon Sep 17 00:00:00 2001 From: Ilqar Ramazanli Date: Fri, 10 Sep 2021 09:47:38 -0700 Subject: [PATCH] To add Rprop documentation (#63866) Summary: It has been discussed before that adding description of Optimization algorithms to PyTorch Core documentation may result in a nice Optimization research tutorial. In the following tracking issue we mentioned about all the necessary algorithms and links to the originally published paper https://github.com/pytorch/pytorch/issues/63236. In this PR we are adding description of Rprop to the documentation. For more details, we refer to the paper http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.1417 Rpropalg Pull Request resolved: https://github.com/pytorch/pytorch/pull/63866 Reviewed By: ngimel Differential Revision: D30867590 Pulled By: iramazanli fbshipit-source-id: 0d2d4ffc6c4d939290bbbaa84d2c6e901ed8b54a --- torch/optim/rprop.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/torch/optim/rprop.py b/torch/optim/rprop.py index fb82fb3..741f6de 100644 --- a/torch/optim/rprop.py +++ b/torch/optim/rprop.py @@ -4,7 +4,39 @@ from .optimizer import Optimizer class Rprop(Optimizer): - """Implements the resilient backpropagation algorithm. + r"""Implements the resilient backpropagation algorithm. + + .. math:: + \begin{aligned} + &\rule{110mm}{0.4pt} \\ + &\textbf{input} : \theta_0 \in \mathbf{R}^d \text{ (params)},f(\theta) + \text{ (objective)}, \\ + &\hspace{13mm} \eta_{+/-} \text{ (etaplus, etaminus)}, \Gamma_{max/min} + \text{ (step sizes)} \\ + &\textbf{initialize} : g^0_{prev} \leftarrow 0, + \: \eta_0 \leftarrow \text{lr (learning rate)} \\ + &\rule{110mm}{0.4pt} \\ + &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do} \\ + &\hspace{5mm}g_t \leftarrow \nabla_{\theta} f_t (\theta_{t-1}) \\ + &\hspace{5mm} \textbf{for} \text{ } i = 0, 1, \ldots, d-1 \: \mathbf{do} \\ + &\hspace{10mm} \textbf{if} \: g^i_{prev} g^i_t > 0 \\ + &\hspace{15mm} \eta^i_t \leftarrow \mathrm{min}(\eta^i_{t-1} \eta_{+}, + \Gamma_{max}) \\ + &\hspace{10mm} \textbf{else if} \: g^i_{prev} g^i_t < 0 \\ + &\hspace{15mm} \eta^i_t \leftarrow \mathrm{max}(\eta^i_{t-1} \eta_{-}, + \Gamma_{min}) \\ + &\hspace{10mm} \textbf{else} \: \\ + &\hspace{15mm} \eta^i_t \leftarrow \eta^i_{t-1} \\ + &\hspace{5mm}\theta_t \leftarrow \theta_{t-1}- \eta_t \mathrm{sign}(g_t) \\ + &\hspace{5mm}g_{prev} \leftarrow g_t \\ + &\rule{110mm}{0.4pt} \\[-1.ex] + &\bf{return} \: \theta_t \\[-1.ex] + &\rule{110mm}{0.4pt} \\[-1.ex] + \end{aligned} + + For further details regarding the algorithm we refer to the paper + `A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm + `_. Args: params (iterable): iterable of parameters to optimize or dicts defining -- 2.7.4