misc update

author Yangqing Jia <jiayq84@gmail.com>

Tue, 8 Oct 2013 00:38:03 +0000 (17:38 -0700)

committer Yangqing Jia <jiayq84@gmail.com>

Tue, 8 Oct 2013 00:38:03 +0000 (17:38 -0700)
author Yangqing Jia <jiayq84@gmail.com>
Tue, 8 Oct 2013 00:38:03 +0000 (17:38 -0700)
committer Yangqing Jia <jiayq84@gmail.com>
Tue, 8 Oct 2013 00:38:03 +0000 (17:38 -0700)
diff --git a/src/Makefile b/src/Makefile

index deebe75..d78e99b 100644 (file)
--- a/src/Makefile
+++ b/src/Makefile
@@ -3,7 +3,7 @@
  # a lowercase prefix (in this case "program") and an uppercased suffix (in this case "NAME"), separated
  # by an underscore is used to name attributes for a common element. Think of this like
  # using program.NAME, program.C_SRCS, etc. There are no structs in Make, so we use this convention
-# to keep track of attributes that all belong to the same target or program.  
+# to keep track of attributes that all belong to the same target or program.
  #
  PROJECT := caffe
  NAME := lib$(PROJECT).so
@@ -61,7 +61,7 @@ test: $(OBJS) $(GTEST_OBJ) $(TEST_BINS)
  program: $(OBJS) $(PROGRAM_BINS)
  
  runtest: test
-       for testbin in $(TEST_BINS); do $$testbin; done
+       for testbin in $(TEST_BINS); do $$testbin 1; done
  
  $(TEST_BINS): %.testbin : %.o
         $(CXX) -pthread $< $(OBJS) $(GTEST_OBJ) -o $@ $(LDFLAGS) $(WARNINGS)
diff --git a/src/caffe/filler.hpp b/src/caffe/filler.hpp

index 99cb5bc..effe62f 100644 (file)
--- a/src/caffe/filler.hpp
+++ b/src/caffe/filler.hpp
@@ -103,8 +103,8 @@ class PositiveUnitballFiller : public Filler<Dtype> {
  //
  // It fills the incoming matrix by randomly sampling uniform data from
  // [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
-// of input nodes, and in our case we consider the blob width as the scale.
-// You should make sure the input blob has shape (1, 1, height, width).
+// of input nodes. You should make sure the input blob has shape (num, a, b, c)
+// where a * b * c = fan_in.
  template <typename Dtype>
  class XavierFiller : public Filler<Dtype> {
   public:
@@ -112,10 +112,7 @@ class XavierFiller : public Filler<Dtype> {
        : Filler<Dtype>(param) {}
    virtual void Fill(Blob<Dtype>* blob) {
      CHECK(blob->count());
-    CHECK_EQ(blob->num(), 1) << "XavierFiller requires blob.num() = 1.";
-    CHECK_EQ(blob->channels(), 1)
-        << "XavierFiller requires blob.channels() = 1.";
-    int fan_in = blob->width();
+    int fan_in = blob->count() / blob->num();
      Dtype scale = sqrt(Dtype(3) / fan_in);
      caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(),
          -scale, scale);
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp

index 9560e47..8bf913a 100644 (file)
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -45,7 +45,8 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
      this->blobs_.resize(1);
    }
    // Intialize the weight
-  this->blobs_[0].reset(new Blob<Dtype>(1, 1, NUM_OUTPUT_, K_));
+  this->blobs_[0].reset(
+      new Blob<Dtype>(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_));
    // fill the weights
    shared_ptr<Filler<Dtype> > weight_filler(
        GetFiller<Dtype>(this->layer_param_.weight_filler()));
diff --git a/src/caffe/optimization/solver.cpp b/src/caffe/optimization/solver.cpp

index cb288b3..2b2656d 100644 (file)
--- a/src/caffe/optimization/solver.cpp
+++ b/src/caffe/optimization/solver.cpp
@@ -31,9 +31,8 @@ void Solver<Dtype>::Solve(Net<Dtype>* net) {
      net_->Update();
  
      // Check if we need to do snapshot
-    if (param_.snapshot() > 0 && iter_ % param_.snapshot()) {
-      // TODO(Yangqing): snapshot
-      NOT_IMPLEMENTED;
+    if (param_.snapshot() > 0 && iter_ % param_.snapshot() == 0) {
+      Snapshot(false);
      }
      if (param_.display()) {
        LOG(ERROR) << "Iteration " << iter_ << ", loss = " << loss;
@@ -53,8 +52,10 @@ void Solver<Dtype>::Snapshot(bool is_final) {
    } else {
      ss << "_iter_" << iter_;
    }
+  string filename = ss.str();
+  LOG(ERROR) << "Snapshotting to " << filename;
    ofstream output_file;
-  output_file.open(ss.str().c_str());
+  output_file.open(filename.c_str());
    CHECK(net_param.SerializeToOstream(&output_file));
    output_file.close();
  }
diff --git a/src/caffe/pyutil/convert.py b/src/caffe/pyutil/convert.py

index 8a76a50..6bc76ae 100644 (file)
--- a/src/caffe/pyutil/convert.py
+++ b/src/caffe/pyutil/convert.py
@@ -5,8 +5,8 @@ from caffe.proto import caffe_pb2
  import numpy as np
  
  def blobproto_to_array(blob):
-  arr = np.array(blob.data).reshape(blob.num(), blob.channels(), blobs.height(),
-      blobs.width())
+  arr = np.array(blob.data).reshape(blob.num, blob.channels, blob.height,
+      blob.width)
    return arr
  
  def array_to_blobproto(arr):
@@ -14,7 +14,7 @@ def array_to_blobproto(arr):
      raise ValueError('Incorrect array shape.')
    blob = caffe_pb2.BlobProto()
    blob.num, blob.channels, blob.height, blob.width = arr.shape;
-  blob.data.extend(arr.flat)
+  blob.data.extend(arr.astype(float).flat)
    return blob
  
  def array_to_datum(arr):
diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp

index c991d8d..e6600f2 100644 (file)
--- a/src/caffe/syncedmem.cpp
+++ b/src/caffe/syncedmem.cpp
@@ -59,7 +59,6 @@ inline void SyncedMemory::to_gpu() {
    }
  }
  
-
  const void* SyncedMemory::cpu_data() {
    to_cpu();
    return (const void*)cpu_ptr_;
diff --git a/src/programs/train_alexnet.cpp b/src/programs/train_alexnet.cpp

index d6a4ca5..d908859 100644 (file)
--- a/src/programs/train_alexnet.cpp
+++ b/src/programs/train_alexnet.cpp
@@ -41,6 +41,8 @@ int main(int argc, char** argv) {
    //solver_param.set_power(0.75);
    solver_param.set_momentum(0.9);
    solver_param.set_weight_decay(0.0005);
+  solver_param.set_snapshot(100);
+  solver_param.set_snapshot_prefix("alexnet");
  
    LOG(ERROR) << "Starting Optimization";
    SGDSolver<float> solver(solver_param);
author	Yangqing Jia <jiayq84@gmail.com>
	Tue, 8 Oct 2013 00:38:03 +0000 (17:38 -0700)
committer	Yangqing Jia <jiayq84@gmail.com>
	Tue, 8 Oct 2013 00:38:03 +0000 (17:38 -0700)
src/Makefile		patch \| blob \| history
src/caffe/filler.hpp		patch \| blob \| history
src/caffe/layers/conv_layer.cpp		patch \| blob \| history
src/caffe/optimization/solver.cpp		patch \| blob \| history
src/caffe/pyutil/convert.py		patch \| blob \| history
src/caffe/syncedmem.cpp		patch \| blob \| history
src/programs/train_alexnet.cpp		patch \| blob \| history