Clean up and modernize AdaDelta code; add learning rate support; add additional test...

author Matthias Plappert <matthiasplappert@me.com>

Sat, 18 Jul 2015 16:46:51 +0000 (18:46 +0200)

committer Matthias Plappert <matthiasplappert@me.com>

Mon, 10 Aug 2015 09:44:13 +0000 (11:44 +0200)
author Matthias Plappert <matthiasplappert@me.com>
Sat, 18 Jul 2015 16:46:51 +0000 (18:46 +0200)
committer Matthias Plappert <matthiasplappert@me.com>
Mon, 10 Aug 2015 09:44:13 +0000 (11:44 +0200)
diff --git a/examples/mnist/lenet_adadelta_solver.prototxt b/examples/mnist/lenet_adadelta_solver.prototxt

index b77b451..776d1e0 100644 (file)
--- a/examples/mnist/lenet_adadelta_solver.prototxt
+++ b/examples/mnist/lenet_adadelta_solver.prototxt
@@ -7,6 +7,8 @@ test_iter: 100
  # Carry out testing every 500 training iterations.
  test_interval: 500
  # The base learning rate, momentum and the weight decay of the network.
+base_lr: 1.0
+lr_policy: "fixed"
  momentum: 0.95
  weight_decay: 0.0005
  # Display every 100 iterations
diff --git a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt

index 4e43468..065647d 100644 (file)
--- a/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt
+++ b/examples/mnist/mnist_autoencoder_solver_adadelta.prototxt
@@ -5,6 +5,8 @@ test_state: { stage: 'test-on-test' }
  test_iter: 100
  test_interval: 500
  test_compute_loss: true
+base_lr: 1.0
+lr_policy: "fixed"
  momentum: 0.95
  delta: 1e-8
  display: 100
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp

index 495cd4f..5fefd01 100644 (file)
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -82,12 +82,12 @@ class SGDSolver : public Solver<Dtype> {
    const vector<shared_ptr<Blob<Dtype> > >& history() { return history_; }
  
   protected:
+  void PreSolve();
    Dtype GetLearningRate();
    virtual void ApplyUpdate();
    virtual void Normalize(int param_id);
    virtual void Regularize(int param_id);
    virtual void ComputeUpdateValue(int param_id, Dtype rate);
-  virtual void PreSolve();
    virtual void ClipGradients();
    virtual void SnapshotSolverState(const string& model_filename);
    virtual void SnapshotSolverStateToBinaryProto(const string& model_filename);
@@ -162,19 +162,13 @@ template <typename Dtype>
  class AdaDeltaSolver : public SGDSolver<Dtype> {
   public:
    explicit AdaDeltaSolver(const SolverParameter& param)
-      : SGDSolver<Dtype>(param) { PreSolve(); constructor_sanity_check(); }
+      : SGDSolver<Dtype>(param) { AdaDeltaPreSolve(); }
    explicit AdaDeltaSolver(const string& param_file)
-      : SGDSolver<Dtype>(param_file) { PreSolve(); constructor_sanity_check(); }
+      : SGDSolver<Dtype>(param_file) { AdaDeltaPreSolve(); }
  
   protected:
-  virtual void PreSolve();
-  virtual void ComputeUpdateValue();
-  void constructor_sanity_check() {
-    CHECK_EQ(0, this->param_.base_lr())
-        << "Learning rate cannot be used with AdaDelta.";
-    CHECK_EQ("", this->param_.lr_policy())
-        << "Learning rate policy cannot be applied to AdaDelta.";
-  }
+  void AdaDeltaPreSolve();
+  virtual void ComputeUpdateValue(int param_id, Dtype rate);
  
    DISABLE_COPY_AND_ASSIGN(AdaDeltaSolver);
  };
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp

index 34a290f..78902ca 100644 (file)
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -935,10 +935,10 @@ void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
  }
  
  template <typename Dtype>
-void AdaDeltaSolver<Dtype>::PreSolve() {
+void AdaDeltaSolver<Dtype>::AdaDeltaPreSolve() {
    // Add the extra history entries for AdaDelta after those from
    // SGDSolver::PreSolve
-  const vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
    for (int i = 0; i < net_params.size(); ++i) {
          const vector<int>& shape = net_params[i]->shape();
          this->history_.push_back(
@@ -947,172 +947,134 @@ void AdaDeltaSolver<Dtype>::PreSolve() {
  }
  
  template <typename Dtype>
-void AdaDeltaSolver<Dtype>::ComputeUpdateValue() {
-  const vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
-  const vector<float>& net_params_weight_decay =
-          this->net_->params_weight_decay();
+void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
+  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
+  const vector<float>& net_params_lr = this->net_->params_lr();
    Dtype delta = this->param_.delta();
    Dtype momentum = this->param_.momentum();
-  Dtype weight_decay = this->param_.weight_decay();
-  string regularization_type = this->param_.regularization_type();
+  Dtype local_rate = rate * net_params_lr[param_id];
    size_t update_history_offset = net_params.size();
    switch (Caffe::mode()) {
-  case Caffe::CPU:
-    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
-      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];
-
-      if (local_decay) {
-        if (regularization_type == "L2") {
-          // add weight decay
-          caffe_axpy(net_params[param_id]->count(),
-              local_decay,
-              net_params[param_id]->cpu_data(),
-              net_params[param_id]->mutable_cpu_diff());
-        } else if (regularization_type == "L1") {
-          caffe_cpu_sign(net_params[param_id]->count(),
-              net_params[param_id]->cpu_data(),
-              this->temp_[param_id]->mutable_cpu_data());
-          caffe_axpy(net_params[param_id]->count(),
-              local_decay,
-              this->temp_[param_id]->cpu_data(),
-              net_params[param_id]->mutable_cpu_diff());
-        } else {
-          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
-        }
-      }
+  case Caffe::CPU: {
+    // compute square of gradient in update
+    caffe_powx(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_cpu_data());
  
-      // compute square of gradient in update
-      caffe_powx(net_params[param_id]->count(),
-          net_params[param_id]->cpu_diff(), Dtype(2),
-          this->update_[param_id]->mutable_cpu_data());
-
-      // update history of gradients
-      caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-          this->update_[param_id]->cpu_data(), momentum,
-          this->history_[param_id]->mutable_cpu_data());
-
-      // add delta to history to guard against dividing by zero later
-      caffe_set(net_params[param_id]->count(), delta,
-          this->temp_[param_id]->mutable_cpu_data());
-
-      caffe_add(net_params[param_id]->count(),
-          this->temp_[param_id]->cpu_data(),
-          this->history_[update_history_offset + param_id]->cpu_data(),
-          this->update_[param_id]->mutable_cpu_data());
-
-      caffe_add(net_params[param_id]->count(),
-          this->temp_[param_id]->cpu_data(),
-          this->history_[param_id]->cpu_data(),
-          this->temp_[param_id]->mutable_cpu_data());
-
-      // divide history of updates by history of gradients
-      caffe_div(net_params[param_id]->count(),
-          this->update_[param_id]->cpu_data(),
-          this->temp_[param_id]->cpu_data(),
-          this->update_[param_id]->mutable_cpu_data());
-
-      // jointly compute the RMS of both for update and gradient history
-      caffe_powx(net_params[param_id]->count(),
-          this->update_[param_id]->cpu_data(), Dtype(0.5),
-          this->update_[param_id]->mutable_cpu_data());
-
-      // compute the update
-      caffe_mul(net_params[param_id]->count(),
-          net_params[param_id]->cpu_diff(),
-          this->update_[param_id]->cpu_data(),
-          net_params[param_id]->mutable_cpu_diff());
-
-      // compute square of update
-      caffe_powx(net_params[param_id]->count(),
-          net_params[param_id]->cpu_diff(), Dtype(2),
-          this->update_[param_id]->mutable_cpu_data());
-
-      // update history of updates
-      caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-          this->update_[param_id]->cpu_data(), momentum,
-          this->history_[update_history_offset + param_id]->mutable_cpu_data());
-    }
+    // update history of gradients
+    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->cpu_data(), momentum,
+        this->history_[param_id]->mutable_cpu_data());
+
+    // add delta to history to guard against dividing by zero later
+    caffe_set(net_params[param_id]->count(), delta,
+        this->temp_[param_id]->mutable_cpu_data());
+
+    caffe_add(net_params[param_id]->count(),
+        this->temp_[param_id]->cpu_data(),
+        this->history_[update_history_offset + param_id]->cpu_data(),
+        this->update_[param_id]->mutable_cpu_data());
+
+    caffe_add(net_params[param_id]->count(),
+        this->temp_[param_id]->cpu_data(),
+        this->history_[param_id]->cpu_data(),
+        this->temp_[param_id]->mutable_cpu_data());
+
+    // divide history of updates by history of gradients
+    caffe_div(net_params[param_id]->count(),
+        this->update_[param_id]->cpu_data(),
+        this->temp_[param_id]->cpu_data(),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // jointly compute the RMS of both for update and gradient history
+    caffe_powx(net_params[param_id]->count(),
+        this->update_[param_id]->cpu_data(), Dtype(0.5),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // compute the update
+    caffe_mul(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(),
+        this->update_[param_id]->cpu_data(),
+        net_params[param_id]->mutable_cpu_diff());
+
+    // compute square of update
+    caffe_powx(net_params[param_id]->count(),
+        net_params[param_id]->cpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_cpu_data());
+
+    // update history of updates
+    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->cpu_data(), momentum,
+        this->history_[update_history_offset + param_id]->mutable_cpu_data());
+
+    // apply learning rate
+    caffe_cpu_scale(net_params[param_id]->count(), local_rate,
+        net_params[param_id]->cpu_diff(),
+        net_params[param_id]->mutable_cpu_diff());
      break;
-  case Caffe::GPU:
+  }
+  case Caffe::GPU: {
  #ifndef CPU_ONLY
-    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
-      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];
-
-      if (local_decay) {
-        if (regularization_type == "L2") {
-          // add weight decay
-          caffe_gpu_axpy(net_params[param_id]->count(),
-              local_decay,
-              net_params[param_id]->gpu_data(),
-              net_params[param_id]->mutable_gpu_diff());
-        } else if (regularization_type == "L1") {
-          caffe_gpu_sign(net_params[param_id]->count(),
-              net_params[param_id]->gpu_data(),
-              this->temp_[param_id]->mutable_gpu_data());
-          caffe_gpu_axpy(net_params[param_id]->count(),
-              local_decay,
-              this->temp_[param_id]->gpu_data(),
-              net_params[param_id]->mutable_gpu_diff());
-        } else {
-          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
-        }
-      }
+    // compute square of gradient in update
+    caffe_gpu_powx(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_gpu_data());
  
-      // compute square of gradient in update
-      caffe_gpu_powx(net_params[param_id]->count(),
-          net_params[param_id]->gpu_diff(), Dtype(2),
-          this->update_[param_id]->mutable_gpu_data());
-
-      // update history of gradients
-      caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-          this->update_[param_id]->gpu_data(), momentum,
-          this->history_[param_id]->mutable_gpu_data());
-
-      // add delta to history to guard against dividing by zero later
-      caffe_gpu_set(net_params[param_id]->count(), delta,
-          this->temp_[param_id]->mutable_gpu_data());
-
-      caffe_gpu_add(net_params[param_id]->count(),
-          this->temp_[param_id]->gpu_data(),
-          this->history_[update_history_offset + param_id]->gpu_data(),
-          this->update_[param_id]->mutable_gpu_data());
-
-      caffe_gpu_add(net_params[param_id]->count(),
-          this->temp_[param_id]->gpu_data(),
-          this->history_[param_id]->gpu_data(),
-          this->temp_[param_id]->mutable_gpu_data());
-
-      // divide history of updates by history of gradients
-      caffe_gpu_div(net_params[param_id]->count(),
-          this->update_[param_id]->gpu_data(),
-          this->temp_[param_id]->gpu_data(),
-          this->update_[param_id]->mutable_gpu_data());
-
-      // jointly compute the RMS of both for update and gradient history
-      caffe_gpu_powx(net_params[param_id]->count(),
-          this->update_[param_id]->gpu_data(), Dtype(0.5),
-          this->update_[param_id]->mutable_gpu_data());
-
-      // compute the update and copy to net_diff
-      caffe_gpu_mul(net_params[param_id]->count(),
-          net_params[param_id]->gpu_diff(),
-          this->update_[param_id]->gpu_data(),
-          net_params[param_id]->mutable_gpu_diff());
-
-      // compute square of update
-      caffe_gpu_powx(net_params[param_id]->count(),
-          net_params[param_id]->gpu_diff(), Dtype(2),
-          this->update_[param_id]->mutable_gpu_data());
-
-      // update history of updates
-      caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
-          this->update_[param_id]->gpu_data(), momentum,
-          this->history_[update_history_offset + param_id]->mutable_gpu_data());
-    }
+    // update history of gradients
+    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->gpu_data(), momentum,
+        this->history_[param_id]->mutable_gpu_data());
+
+    // add delta to history to guard against dividing by zero later
+    caffe_gpu_set(net_params[param_id]->count(), delta,
+        this->temp_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_add(net_params[param_id]->count(),
+        this->temp_[param_id]->gpu_data(),
+        this->history_[update_history_offset + param_id]->gpu_data(),
+        this->update_[param_id]->mutable_gpu_data());
+
+    caffe_gpu_add(net_params[param_id]->count(),
+        this->temp_[param_id]->gpu_data(),
+        this->history_[param_id]->gpu_data(),
+        this->temp_[param_id]->mutable_gpu_data());
+
+    // divide history of updates by history of gradients
+    caffe_gpu_div(net_params[param_id]->count(),
+        this->update_[param_id]->gpu_data(),
+        this->temp_[param_id]->gpu_data(),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // jointly compute the RMS of both for update and gradient history
+    caffe_gpu_powx(net_params[param_id]->count(),
+        this->update_[param_id]->gpu_data(), Dtype(0.5),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // compute the update and copy to net_diff
+    caffe_gpu_mul(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(),
+        this->update_[param_id]->gpu_data(),
+        net_params[param_id]->mutable_gpu_diff());
+
+    // compute square of update
+    caffe_gpu_powx(net_params[param_id]->count(),
+        net_params[param_id]->gpu_diff(), Dtype(2),
+        this->update_[param_id]->mutable_gpu_data());
+
+    // update history of updates
+    caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
+        this->update_[param_id]->gpu_data(), momentum,
+        this->history_[update_history_offset + param_id]->mutable_gpu_data());
+
+    // apply learning rate
+    caffe_gpu_scale(net_params[param_id]->count(), local_rate,
+        net_params[param_id]->gpu_diff(),
+        net_params[param_id]->mutable_gpu_diff());
  #else
      NO_GPU;
  #endif
      break;
+  }
    default:
      LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
    }
diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp

index 277aa3a..c97d4ed 100644 (file)
--- a/src/caffe/test/test_gradient_based_solver.cpp
+++ b/src/caffe/test/test_gradient_based_solver.cpp
@@ -165,10 +165,6 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
         "    bottom: 'targets' "
         "  } "
         "} ";
-    if (learning_rate != 0) {
-      proto << "base_lr: " << learning_rate << " ";
-      proto << "lr_policy: 'fixed' ";
-    }
      if (weight_decay != 0) {
        proto << "weight_decay: " << weight_decay << " ";
      }
@@ -898,6 +894,139 @@ TYPED_TEST(NesterovSolverTest, TestSnapshotShare) {
  }
  
  template <typename TypeParam>
+class AdaDeltaSolverTest : public GradientBasedSolverTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  virtual void InitSolver(const SolverParameter& param) {
+    this->solver_.reset(new AdaDeltaSolver<Dtype>(param));
+  }
+
+  virtual SolverParameter_SolverType solver_type() {
+    return SolverParameter_SolverType_ADADELTA;
+  }
+};
+
+TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices);
+
+TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdate) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  this->TestLeastSquaresUpdate(kLearningRate);
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithWeightDecay) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.5;
+  const Dtype kMomentum = 0.95;
+  this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum);
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithHalfMomentum) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.0;
+  const Dtype kMomentum = 0.5;
+  const int kNumIters = 1;
+  for (int i = 0; i <= kNumIters; ++i) {
+      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum);
+  }
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithMomentum) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.0;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 1;
+  for (int i = 0; i <= kNumIters; ++i) {
+      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum);
+  }
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithMomentumMultiIter) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.0;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 4;
+  for (int i = 0; i <= kNumIters; ++i) {
+      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
+  }
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverything) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.1;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 4;
+  for (int i = 0; i <= kNumIters; ++i) {
+      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
+  }
+}
+
+TYPED_TEST(AdaDeltaSolverTest,
+           TestAdaDeltaLeastSquaresUpdateWithEverythingShare) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.1;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 4;
+  this->share_ = true;
+  for (int i = 0; i <= kNumIters; ++i) {
+      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
+  }
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccum) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.1;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 4;
+  const int kIterSize = 2;
+  this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+      kIterSize);
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithEverythingAccumShare) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.1;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 4;
+  const int kIterSize = 2;
+  this->share_ = true;
+  this->CheckAccumulation(kLearningRate, kWeightDecay, kMomentum, kNumIters,
+      kIterSize);
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestSnapshot) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.1;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 4;
+  for (int i = 1; i <= kNumIters; ++i) {
+    this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
+  }
+}
+
+TYPED_TEST(AdaDeltaSolverTest, TestSnapshotShare) {
+  typedef typename TypeParam::Dtype Dtype;
+  const Dtype kLearningRate = 1.0;
+  const Dtype kWeightDecay = 0.1;
+  const Dtype kMomentum = 0.95;
+  const int kNumIters = 4;
+  this->share_ = true;
+  for (int i = 1; i <= kNumIters; ++i) {
+    this->TestSnapshot(kLearningRate, kWeightDecay, kMomentum, i);
+  }
+}
+
+template <typename TypeParam>
  class RMSPropSolverTest : public GradientBasedSolverTest<TypeParam> {
    typedef typename TypeParam::Dtype Dtype;
  
@@ -1003,78 +1132,4 @@ TYPED_TEST(RMSPropSolverTest, TestSnapshotShare) {
    }
  }
  
-template <typename TypeParam>
-class AdaDeltaSolverTest : public GradientBasedSolverTest<TypeParam> {
-  typedef typename TypeParam::Dtype Dtype;
-
- protected:
-  virtual void InitSolver(const SolverParameter& param) {
-    this->solver_.reset(new AdaDeltaSolver<Dtype>(param));
-  }
-
-  virtual SolverParameter_SolverType solver_type() {
-    return SolverParameter_SolverType_ADADELTA;
-  }
-};
-
-TYPED_TEST_CASE(AdaDeltaSolverTest, TestDtypesAndDevices);
-
-TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdate) {
-  typedef typename TypeParam::Dtype Dtype;
-  const Dtype kLearningRate = 0.0;
-  this->TestLeastSquaresUpdate(kLearningRate);
-}
-
-TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithWeightDecay) {
-  typedef typename TypeParam::Dtype Dtype;
-  const Dtype kLearningRate = 0.0;
-  const Dtype kWeightDecay = 0.5;
-  const Dtype kMomentum = 0.95;
-  this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum);
-}
-
-TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithHalfMomentum) {
-  typedef typename TypeParam::Dtype Dtype;
-  const Dtype kLearningRate = 0.0;
-  const Dtype kWeightDecay = 0.0;
-  const Dtype kMomentum = 0.5;
-  const int kNumIters = 1;
-  for (int i = 0; i <= kNumIters; ++i) {
-      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum);
-  }
-}
-
-TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithMomentum) {
-  typedef typename TypeParam::Dtype Dtype;
-  const Dtype kLearningRate = 0.0;
-  const Dtype kWeightDecay = 0.0;
-  const Dtype kMomentum = 0.95;
-  const int kNumIters = 1;
-  for (int i = 0; i <= kNumIters; ++i) {
-      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum);
-  }
-}
-
-TYPED_TEST(AdaDeltaSolverTest, TestLeastSquaresUpdateWithMomentumMultiIter) {
-  typedef typename TypeParam::Dtype Dtype;
-  const Dtype kLearningRate = 0.0;
-  const Dtype kWeightDecay = 0.0;
-  const Dtype kMomentum = 0.95;
-  const int kNumIters = 4;
-  for (int i = 0; i <= kNumIters; ++i) {
-      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
-  }
-}
-
-TYPED_TEST(AdaDeltaSolverTest, TestAdaDeltaLeastSquaresUpdateWithEverything) {
-  typedef typename TypeParam::Dtype Dtype;
-  const Dtype kLearningRate = 0.0;
-  const Dtype kWeightDecay = 0.1;
-  const Dtype kMomentum = 0.95;
-  const int kNumIters = 4;
-  for (int i = 0; i <= kNumIters; ++i) {
-      this->TestLeastSquaresUpdate(kLearningRate, kWeightDecay, kMomentum, i);
-  }
-}
-
  }  // namespace caffe
author	Matthias Plappert <matthiasplappert@me.com>
	Sat, 18 Jul 2015 16:46:51 +0000 (18:46 +0200)
committer	Matthias Plappert <matthiasplappert@me.com>
	Mon, 10 Aug 2015 09:44:13 +0000 (11:44 +0200)
examples/mnist/lenet_adadelta_solver.prototxt		patch \| blob \| history
examples/mnist/mnist_autoencoder_solver_adadelta.prototxt		patch \| blob \| history
include/caffe/solver.hpp		patch \| blob \| history
src/caffe/solver.cpp		patch \| blob \| history
src/caffe/test/test_gradient_based_solver.cpp		patch \| blob \| history