From 0144de68e6f98d5158f8c0f94cd31c9bb6f79db6 Mon Sep 17 00:00:00 2001 From: qipeng Date: Tue, 22 Jul 2014 21:17:19 -0700 Subject: [PATCH] improved numerical stability for AdaGrad --- src/caffe/solver.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 5632f24..abcbe5e 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -481,6 +481,7 @@ void NesterovSolver::ComputeUpdateValue() { vector& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = this->GetLearningRate(); + Dtype delta = this->param_.delta(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } @@ -594,10 +595,13 @@ void AdaGradSolver::ComputeUpdateValue() { // prepare update caffe_powx(net_params[param_id]->count(), - this->history_[param_id]->cpu_data(), Dtype(-0.5), + this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); - caffe_mul(net_params[param_id]->count(), + caffe_add_scalar(net_params[param_id]->count(), + delta, this->update_[param_id]->mutable_cpu_data()); + + caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); @@ -635,10 +639,13 @@ void AdaGradSolver::ComputeUpdateValue() { // prepare update caffe_gpu_powx(net_params[param_id]->count(), - this->history_[param_id]->gpu_data(), Dtype(-0.5), + this->history_[param_id]->gpu_data(), Dtype(0.5), this->update_[param_id]->mutable_gpu_data()); - caffe_gpu_mul(net_params[param_id]->count(), + caffe_gpu_add_scalar(net_params[param_id]->count(), + delta, this->update_[param_id]->mutable_gpu_data()); + + caffe_gpu_div(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); -- 2.7.4