}
string RunLeastSquaresSolver(const Dtype learning_rate,
- const Dtype weight_decay, const Dtype momentum, const Dtype rms_decay,
- const int num_iters, const int iter_size = 1, const bool snapshot = false,
+ const Dtype weight_decay, const Dtype momentum, const int num_iters,
+ const int iter_size = 1, const bool snapshot = false,
const char* from_snapshot = NULL) {
ostringstream proto;
proto <<
if (momentum != 0) {
proto << "momentum: " << momentum << " ";
}
- if (rms_decay != 0) {
- proto << "rms_decay: " << rms_decay << " ";
- }
MakeTempDir(&snapshot_prefix_);
proto << "snapshot_prefix: '" << snapshot_prefix_ << "/' ";
if (snapshot) {
// updated_params will store the updated weight and bias results,
// using the blobs' diffs to hold the update values themselves.
void ComputeLeastSquaresUpdate(const Dtype learning_rate,
- const Dtype weight_decay, const Dtype momentum, const Dtype rms_decay,
+ const Dtype weight_decay, const Dtype momentum,
vector<shared_ptr<Blob<Dtype> > >* updated_params) {
const int N = num_;
const int D = channels_ * height_ * width_;
case SolverParameter_SolverType_ADAGRAD:
update_value /= std::sqrt(history_value + grad * grad) + delta_;
break;
- case SolverParameter_SolverType_RMSPROP:
+ case SolverParameter_SolverType_RMSPROP: {
+ const Dtype rms_decay = 0.95;
update_value /= std::sqrt(rms_decay*history_value
+ grad * grad * (1 - rms_decay)) + delta_;
+ }
break;
default:
LOG(FATAL) << "Unknown solver type: " << solver_type();
}
void CheckAccumulation(const Dtype kLearningRate, const Dtype kWeightDecay,
- const Dtype kMomentum, const Dtype rms_decay, const int kNumIters,
- const int kIterSize) {
+ const Dtype kMomentum, const int kNumIters, const int kIterSize) {
const double kPrecision = 1e-2;
const double kMinPrecision = 1e-7;
constant_data_ = true;
// Solve without accumulation and save parameters.
this->RunLeastSquaresSolver(kLearningRate, kWeightDecay, kMomentum,
- rms_decay, kNumIters);
+ kNumIters);
// Save parameters for comparison.
Net<Dtype>& net = *this->solver_->net();
const vector<shared_ptr<Blob<Dtype> > >& param_blobs =
}
// Solve by equivalent accumulation of gradients over divided batches.
this->RunLeastSquaresSolver(kLearningRate, kWeightDecay, kMomentum,
- rms_decay, kNumIters, kIterSize);
+ kNumIters, kIterSize);
Net<Dtype>& net_accum = *this->solver_->net();
const vector<shared_ptr<Blob<Dtype> > >& accum_params =
net_accum.layer_by_name("innerprod")->blobs();
// matches the solver's (K+1)th update.
void TestLeastSquaresUpdate(const Dtype learning_rate = 1.0,
const Dtype weight_decay = 0.0, const Dtype momentum = 0.0,
- const Dtype rms_decay = 0.0, const int iter_to_check = 0) {
+ const int iter_to_check = 0) {
// Initialize the solver and run K (= iter_to_check) solver iterations.
- RunLeastSquaresSolver(learning_rate, weight_decay, momentum, rms_decay,
- iter_to_check);
+ RunLeastSquaresSolver(learning_rate, weight_decay, momentum, iter_to_check);
// Compute the (K+1)th update using the analytic least squares gradient.
vector<shared_ptr<Blob<Dtype> > > updated_params;
ComputeLeastSquaresUpdate(learning_rate, weight_decay, momentum,
- rms_decay, &updated_params);
+ &updated_params);
// Reinitialize the solver and run K+1 solver iterations.
- RunLeastSquaresSolver(learning_rate, weight_decay, momentum, rms_decay,
+ RunLeastSquaresSolver(learning_rate, weight_decay, momentum,
iter_to_check + 1);
// Check that the solver's solution matches ours.
void TestSnapshot(const Dtype learning_rate = 1.0,
const Dtype weight_decay = 0.0, const Dtype momentum = 0.0,
- const Dtype rms_decay = 0.0, const int num_iters = 1) {
+ const int num_iters = 1) {
// Run the solver for num_iters * 2 iterations.
const int total_num_iters = num_iters * 2;
bool snapshot = false;
const int kIterSize = 1;
- RunLeastSquaresSolver(learning_rate, weight_decay, momentum, rms_decay,
- total_num_iters, kIterSize, snapshot);
+ RunLeastSquaresSolver(learning_rate, weight_decay, momentum,
+ total_num_iters, kIterSize, snapshot);
// Save the resulting param values.
vector<shared_ptr<Blob<Dtype> > > param_copies;
// Run the solver for num_iters iterations and snapshot.
snapshot = true;
string snapshot_name = RunLeastSquaresSolver(learning_rate, weight_decay,
- momentum, rms_decay, num_iters, kIterSize, snapshot);
+ momentum, num_iters, kIterSize, snapshot);
// Reinitialize the solver and run for num_iters more iterations.
snapshot = false;
- RunLeastSquaresSolver(learning_rate, weight_decay, momentum, rms_decay,
+ RunLeastSquaresSolver(learning_rate, weight_decay, momentum,
total_num_iters, kIterSize, snapshot, snapshot_name.c_str());
// Check that params now match.
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0;
const Dtype kMomentum = 0.5;
- const Dtype kRMSDecay = 0;
const int kNumIters = 1;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0;
const Dtype kMomentum = 0.5;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.5;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.5;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
this->share_ = true;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
const int kIterSize = 2;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(SGDSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
const int kIterSize = 2;
this->share_ = true;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(SGDSolverTest, TestSnapshot) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
this->share_ = true;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
this->share_ = true;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
const int kIterSize = 2;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(AdaGradSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
const int kIterSize = 2;
this->share_ = true;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(AdaGradSolverTest, TestSnapshot) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
this->share_ = true;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0;
const Dtype kMomentum = 0.5;
- const Dtype kRMSDecay = 0;
const int kNumIters = 1;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0;
const Dtype kMomentum = 0.5;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
this->share_ = true;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
const int kIterSize = 2;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(NesterovSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
const int kIterSize = 2;
this->share_ = true;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(NesterovSolverTest, TestSnapshot) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.9;
- const Dtype kRMSDecay = 0;
const int kNumIters = 4;
this->share_ = true;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}
protected:
virtual void InitSolver(const SolverParameter& param) {
- this->solver_.reset(new RMSPropSolver<Dtype>(param));
+ const Dtype rms_decay = 0.95;
+ SolverParameter new_param = param;
+ new_param.set_rms_decay(rms_decay);
+ this->solver_.reset(new RMSPropSolver<Dtype>(new_param));
}
virtual SolverParameter_SolverType solver_type() {
return SolverParameter_SolverType_RMSPROP;
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.0;
const Dtype kMomentum = 0.0;
- const Dtype kRMSDecay = 0.95;
const int kNumIters = 4;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.0;
- const Dtype kRMSDecay = 0.95;
const int kNumIters = 4;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.0;
- const Dtype kRMSDecay = 0.95;
const int kNumIters = 4;
this->share_ = true;
for (int i = 0; i <= kNumIters; ++i) {
- this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum,
- kRMSDecay, i);
+ this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.0;
- const Dtype kRMSDecay = 0.95;
const int kNumIters = 4;
const int kIterSize = 2;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(RMSPropSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0.0;
- const Dtype kRMSDecay = 0.95;
const int kNumIters = 4;
const int kIterSize = 2;
this->share_ = true;
- this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kRMSDecay,
- kNumIters, kIterSize);
+ this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+ kIterSize);
}
TYPED_TEST(RMSPropSolverTest, TestSnapshot) {
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0.95;
const int kNumIters = 4;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}
const Dtype kLearningRate = 0.01;
const Dtype kWeightDecay = 0.5;
const Dtype kMomentum = 0;
- const Dtype kRMSDecay = 0.95;
const int kNumIters = 4;
this->share_ = true;
for (int i = 1; i <= kNumIters; ++i) {
- this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, kRMSDecay, i);
+ this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
}
}