From 4a84388ef6139ab01813be109bbfad5a1b8840d6 Mon Sep 17 00:00:00 2001
From: Yangqing Jia <jiayq84@gmail.com>
Date: Mon, 7 Oct 2013 17:38:03 -0700
Subject: [PATCH] misc update

---
 src/Makefile                      | 4 ++--
 src/caffe/filler.hpp              | 9 +++------
 src/caffe/layers/conv_layer.cpp   | 3 ++-
 src/caffe/optimization/solver.cpp | 9 +++++----
 src/caffe/pyutil/convert.py       | 6 +++---
 src/caffe/syncedmem.cpp           | 1 -
 src/programs/train_alexnet.cpp    | 2 ++
 7 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/src/Makefile b/src/Makefile
index deebe75..d78e99b 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -3,7 +3,7 @@
 # a lowercase prefix (in this case "program") and an uppercased suffix (in this case "NAME"), separated
 # by an underscore is used to name attributes for a common element. Think of this like
 # using program.NAME, program.C_SRCS, etc. There are no structs in Make, so we use this convention
-# to keep track of attributes that all belong to the same target or program.  
+# to keep track of attributes that all belong to the same target or program.
 #
 PROJECT := caffe
 NAME := lib$(PROJECT).so
@@ -61,7 +61,7 @@ test: $(OBJS) $(GTEST_OBJ) $(TEST_BINS)
 program: $(OBJS) $(PROGRAM_BINS)
 
 runtest: test
-	for testbin in $(TEST_BINS); do $$testbin; done
+	for testbin in $(TEST_BINS); do $$testbin 1; done
 
 $(TEST_BINS): %.testbin : %.o
 	$(CXX) -pthread $< $(OBJS) $(GTEST_OBJ) -o $@ $(LDFLAGS) $(WARNINGS)
diff --git a/src/caffe/filler.hpp b/src/caffe/filler.hpp
index 99cb5bc..effe62f 100644
--- a/src/caffe/filler.hpp
+++ b/src/caffe/filler.hpp
@@ -103,8 +103,8 @@ class PositiveUnitballFiller : public Filler<Dtype> {
 //
 // It fills the incoming matrix by randomly sampling uniform data from
 // [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
-// of input nodes, and in our case we consider the blob width as the scale.
-// You should make sure the input blob has shape (1, 1, height, width).
+// of input nodes. You should make sure the input blob has shape (num, a, b, c)
+// where a * b * c = fan_in.
 template <typename Dtype>
 class XavierFiller : public Filler<Dtype> {
  public:
@@ -112,10 +112,7 @@ class XavierFiller : public Filler<Dtype> {
       : Filler<Dtype>(param) {}
   virtual void Fill(Blob<Dtype>* blob) {
     CHECK(blob->count());
-    CHECK_EQ(blob->num(), 1) << "XavierFiller requires blob.num() = 1.";
-    CHECK_EQ(blob->channels(), 1)
-        << "XavierFiller requires blob.channels() = 1.";
-    int fan_in = blob->width();
+    int fan_in = blob->count() / blob->num();
     Dtype scale = sqrt(Dtype(3) / fan_in);
     caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(),
         -scale, scale);
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
index 9560e47..8bf913a 100644
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -45,7 +45,8 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
     this->blobs_.resize(1);
   }
   // Intialize the weight
-  this->blobs_[0].reset(new Blob<Dtype>(1, 1, NUM_OUTPUT_, K_));
+  this->blobs_[0].reset(
+      new Blob<Dtype>(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_));
   // fill the weights
   shared_ptr<Filler<Dtype> > weight_filler(
       GetFiller<Dtype>(this->layer_param_.weight_filler()));
diff --git a/src/caffe/optimization/solver.cpp b/src/caffe/optimization/solver.cpp
index cb288b3..2b2656d 100644
--- a/src/caffe/optimization/solver.cpp
+++ b/src/caffe/optimization/solver.cpp
@@ -31,9 +31,8 @@ void Solver<Dtype>::Solve(Net<Dtype>* net) {
     net_->Update();
 
     // Check if we need to do snapshot
-    if (param_.snapshot() > 0 && iter_ % param_.snapshot()) {
-      // TODO(Yangqing): snapshot
-      NOT_IMPLEMENTED;
+    if (param_.snapshot() > 0 && iter_ % param_.snapshot() == 0) {
+      Snapshot(false);
     }
     if (param_.display()) {
       LOG(ERROR) << "Iteration " << iter_ << ", loss = " << loss;
@@ -53,8 +52,10 @@ void Solver<Dtype>::Snapshot(bool is_final) {
   } else {
     ss << "_iter_" << iter_;
   }
+  string filename = ss.str();
+  LOG(ERROR) << "Snapshotting to " << filename;
   ofstream output_file;
-  output_file.open(ss.str().c_str());
+  output_file.open(filename.c_str());
   CHECK(net_param.SerializeToOstream(&output_file));
   output_file.close();
 }
diff --git a/src/caffe/pyutil/convert.py b/src/caffe/pyutil/convert.py
index 8a76a50..6bc76ae 100644
--- a/src/caffe/pyutil/convert.py
+++ b/src/caffe/pyutil/convert.py
@@ -5,8 +5,8 @@ from caffe.proto import caffe_pb2
 import numpy as np
 
 def blobproto_to_array(blob):
-  arr = np.array(blob.data).reshape(blob.num(), blob.channels(), blobs.height(),
-      blobs.width())
+  arr = np.array(blob.data).reshape(blob.num, blob.channels, blob.height,
+      blob.width)
   return arr
 
 def array_to_blobproto(arr):
@@ -14,7 +14,7 @@ def array_to_blobproto(arr):
     raise ValueError('Incorrect array shape.')
   blob = caffe_pb2.BlobProto()
   blob.num, blob.channels, blob.height, blob.width = arr.shape;
-  blob.data.extend(arr.flat)
+  blob.data.extend(arr.astype(float).flat)
   return blob
 
 def array_to_datum(arr):
diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp
index c991d8d..e6600f2 100644
--- a/src/caffe/syncedmem.cpp
+++ b/src/caffe/syncedmem.cpp
@@ -59,7 +59,6 @@ inline void SyncedMemory::to_gpu() {
   }
 }
 
-
 const void* SyncedMemory::cpu_data() {
   to_cpu();
   return (const void*)cpu_ptr_;
diff --git a/src/programs/train_alexnet.cpp b/src/programs/train_alexnet.cpp
index d6a4ca5..d908859 100644
--- a/src/programs/train_alexnet.cpp
+++ b/src/programs/train_alexnet.cpp
@@ -41,6 +41,8 @@ int main(int argc, char** argv) {
   //solver_param.set_power(0.75);
   solver_param.set_momentum(0.9);
   solver_param.set_weight_decay(0.0005);
+  solver_param.set_snapshot(100);
+  solver_param.set_snapshot_prefix("alexnet");
 
   LOG(ERROR) << "Starting Optimization";
   SGDSolver<float> solver(solver_param);
-- 
2.7.4