blob math preparation

author Yangqing Jia <jiayq84@gmail.com>

Thu, 10 Oct 2013 16:08:46 +0000 (09:08 -0700)

committer Yangqing Jia <jiayq84@gmail.com>

Thu, 10 Oct 2013 16:08:46 +0000 (09:08 -0700)
author Yangqing Jia <jiayq84@gmail.com>
Thu, 10 Oct 2013 16:08:46 +0000 (09:08 -0700)
committer Yangqing Jia <jiayq84@gmail.com>
Thu, 10 Oct 2013 16:08:46 +0000 (09:08 -0700)
diff --git a/src/caffe/common.hpp b/src/caffe/common.hpp

index c28ad57..39e417f 100644 (file)
--- a/src/caffe/common.hpp
+++ b/src/caffe/common.hpp
@@ -43,6 +43,10 @@ private:\
  
  namespace caffe {
  
+// Two classes whose purpose are solely for instantiating blob template
+// functions.
+class GPUBrewer {};
+class CPUBrewer {};
  
  // We will use the boost shared_ptr instead of the new C++11 one mainly
  // because cuda does not work (at least now) well with C++11 features.
diff --git a/src/caffe/optimization/solver.cpp b/src/caffe/optimization/solver.cpp

index 500f32a..1afe293 100644 (file)
--- a/src/caffe/optimization/solver.cpp
+++ b/src/caffe/optimization/solver.cpp
@@ -34,7 +34,7 @@ void Solver<Dtype>::Solve(Net<Dtype>* net) {
      if (param_.snapshot() > 0 && iter_ % param_.snapshot() == 0) {
        Snapshot(false);
      }
-    if (param_.display()) {
+    if (param_.display() && iter_ % param_.display()) {
        LOG(ERROR) << "Iteration " << iter_ << ", loss = " << loss;
      }
    }
@@ -60,12 +60,24 @@ void Solver<Dtype>::Snapshot(bool is_final) {
  }
  
  
+// Return the current learning rate. The currently implemented learning rate
+// policies are as follows:
+//    - fixed: always return base_lr.
+//    - step: return base_lr * gamma ^ (floor(iter / step))
+//    - exp: return base_lr * gamma ^ iter
+//    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
+// where base_lr, gamma, step and power are defined in the solver parameter
+// protocol buffer, and iter is the current iteration.
  template <typename Dtype>
  Dtype SGDSolver<Dtype>::GetLearningRate() {
    Dtype rate;
    const string& lr_policy = this->param_.lr_policy();
    if (lr_policy == "fixed") {
      rate = this->param_.base_lr();
+  } else if (lr_policy == "step") {
+    int current_step = this->iter_ / this->param_.stepsize();
+    rate = this->param_.base_lr() *
+        pow(this->param_.gamma(), current_step);
    } else if (lr_policy == "exp") {
      rate = this->param_.base_lr() * pow(this->param_.gamma(), this->iter_);
    } else if (lr_policy == "inv") {
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto

index 3961a42..048144c 100644 (file)
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -85,7 +85,9 @@ message NetParameter {
  
  message SolverParameter {
    optional float base_lr = 1; // The base learning rate
-  optional int32 display = 2; // display options. 0 = no display
+  // the number of iterations between displaying info. If display = 0, no info
+  // will be displayed.
+  optional int32 display = 2;
    optional int32 max_iter = 3; // the maximum number of iterations
    optional int32 snapshot = 4 [default = 0]; // The snapshot interval
    optional string lr_policy = 5; // The learning rate decay policy.
@@ -95,6 +97,7 @@ message SolverParameter {
    optional float power = 9; // The parameter to compute the learning rate.
    optional float momentum = 10; // The momentum value.
    optional float weight_decay = 11; // The weight decay.
+  optional float stepsize = 12; // the stepsize for learning rate policy "step"
  
-  optional string snapshot_prefix = 12; // The prefix for the snapshot.
+  optional string snapshot_prefix = 13; // The prefix for the snapshot.
  }
diff --git a/src/caffe/util/blob_math.cpp.working b/src/caffe/util/blob_math.cpp.working

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/caffe/util/blob_math.hpp b/src/caffe/util/blob_math.hpp

new file mode 100644 (file)

index 0000000..414d6eb
--- /dev/null
+++ b/src/caffe/util/blob_math.hpp
@@ -0,0 +1,104 @@
+// Copyright Yangqing Jia 2013
+//
+// This is a working version of the math functions that would hopefully replace
+// the cpu and gpu separate version, that would eventually replace the old
+// math_functions wrapper.
+
+#include "caffe/common.hpp"
+#include "caffe/syncedmem.hpp"
+
+namespace caffe {
+
+namespace blobmath {
+
+
+// Decaf gemm provides a simpler interface to the gemm functions, with the
+// limitation that the data has to be contiguous in memory.
+template <class Brewer, typename Dtype>
+void gemm(const CBLAS_TRANSPOSE TransA,
+    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
+    const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
+    Dtype* C);
+
+
+
+
+template <typename Dtype>
+void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+    const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
+    Dtype* y);
+
+template <typename Dtype>
+void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
+    const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
+    Dtype* y);
+
+template <typename Dtype>
+void caffe_axpy(const int N, const Dtype alpha, const Dtype* X,
+    Dtype* Y);
+
+template <typename Dtype>
+void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X,
+    Dtype* Y);
+
+template <typename Dtype>
+void caffe_axpby(const int N, const Dtype alpha, const Dtype* X,
+    const Dtype beta, Dtype* Y);
+
+template <typename Dtype>
+void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X,
+    const Dtype beta, Dtype* Y);
+
+template <typename Dtype>
+void caffe_copy(const int N, const Dtype *X, Dtype *Y);
+
+template <typename Dtype>
+void caffe_gpu_copy(const int N, const Dtype *X, Dtype *Y);
+
+template <typename Dtype>
+void caffe_scal(const int N, const Dtype alpha, Dtype *X);
+
+template <typename Dtype>
+void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X);
+
+template <typename Dtype>
+void caffe_sqr(const int N, const Dtype* a, Dtype* y);
+
+template <typename Dtype>
+void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+
+template <typename Dtype>
+void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+
+template <typename Dtype>
+void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+
+template <typename Dtype>
+void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+
+template <typename Dtype>
+void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+
+template <typename Dtype>
+void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
+
+template <typename Dtype>
+void caffe_vRngUniform(const int n, Dtype* r, const Dtype a, const Dtype b);
+
+template <typename Dtype>
+void caffe_vRngGaussian(const int n, Dtype* r, const Dtype a,
+    const Dtype sigma);
+
+template <typename Dtype>
+void caffe_exp(const int n, const Dtype* a, Dtype* y);
+
+template <typename Dtype>
+Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y);
+
+template <typename Dtype>
+void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out);
+
+
+} // namespace blobmath
+
+}  // namespace caffe
+\ No newline at end of file
author	Yangqing Jia <jiayq84@gmail.com>
	Thu, 10 Oct 2013 16:08:46 +0000 (09:08 -0700)
committer	Yangqing Jia <jiayq84@gmail.com>
	Thu, 10 Oct 2013 16:08:46 +0000 (09:08 -0700)
src/caffe/common.hpp		patch \| blob \| history
src/caffe/optimization/solver.cpp		patch \| blob \| history
src/caffe/proto/caffe.proto		patch \| blob \| history
src/caffe/util/blob_math.cpp.working	[new file with mode: 0644]	patch \| blob
src/caffe/util/blob_math.hpp	[new file with mode: 0644]	patch \| blob