From 4a84388ef6139ab01813be109bbfad5a1b8840d6 Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Mon, 7 Oct 2013 17:38:03 -0700 Subject: [PATCH] misc update --- src/Makefile | 4 ++-- src/caffe/filler.hpp | 9 +++------ src/caffe/layers/conv_layer.cpp | 3 ++- src/caffe/optimization/solver.cpp | 9 +++++---- src/caffe/pyutil/convert.py | 6 +++--- src/caffe/syncedmem.cpp | 1 - src/programs/train_alexnet.cpp | 2 ++ 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Makefile b/src/Makefile index deebe75..d78e99b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -3,7 +3,7 @@ # a lowercase prefix (in this case "program") and an uppercased suffix (in this case "NAME"), separated # by an underscore is used to name attributes for a common element. Think of this like # using program.NAME, program.C_SRCS, etc. There are no structs in Make, so we use this convention -# to keep track of attributes that all belong to the same target or program. +# to keep track of attributes that all belong to the same target or program. # PROJECT := caffe NAME := lib$(PROJECT).so @@ -61,7 +61,7 @@ test: $(OBJS) $(GTEST_OBJ) $(TEST_BINS) program: $(OBJS) $(PROGRAM_BINS) runtest: test - for testbin in $(TEST_BINS); do $$testbin; done + for testbin in $(TEST_BINS); do $$testbin 1; done $(TEST_BINS): %.testbin : %.o $(CXX) -pthread $< $(OBJS) $(GTEST_OBJ) -o $@ $(LDFLAGS) $(WARNINGS) diff --git a/src/caffe/filler.hpp b/src/caffe/filler.hpp index 99cb5bc..effe62f 100644 --- a/src/caffe/filler.hpp +++ b/src/caffe/filler.hpp @@ -103,8 +103,8 @@ class PositiveUnitballFiller : public Filler { // // It fills the incoming matrix by randomly sampling uniform data from // [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number -// of input nodes, and in our case we consider the blob width as the scale. -// You should make sure the input blob has shape (1, 1, height, width). +// of input nodes. You should make sure the input blob has shape (num, a, b, c) +// where a * b * c = fan_in. template class XavierFiller : public Filler { public: @@ -112,10 +112,7 @@ class XavierFiller : public Filler { : Filler(param) {} virtual void Fill(Blob* blob) { CHECK(blob->count()); - CHECK_EQ(blob->num(), 1) << "XavierFiller requires blob.num() = 1."; - CHECK_EQ(blob->channels(), 1) - << "XavierFiller requires blob.channels() = 1."; - int fan_in = blob->width(); + int fan_in = blob->count() / blob->num(); Dtype scale = sqrt(Dtype(3) / fan_in); caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), -scale, scale); diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 9560e47..8bf913a 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -45,7 +45,8 @@ void ConvolutionLayer::SetUp(const vector*>& bottom, this->blobs_.resize(1); } // Intialize the weight - this->blobs_[0].reset(new Blob(1, 1, NUM_OUTPUT_, K_)); + this->blobs_[0].reset( + new Blob(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_)); // fill the weights shared_ptr > weight_filler( GetFiller(this->layer_param_.weight_filler())); diff --git a/src/caffe/optimization/solver.cpp b/src/caffe/optimization/solver.cpp index cb288b3..2b2656d 100644 --- a/src/caffe/optimization/solver.cpp +++ b/src/caffe/optimization/solver.cpp @@ -31,9 +31,8 @@ void Solver::Solve(Net* net) { net_->Update(); // Check if we need to do snapshot - if (param_.snapshot() > 0 && iter_ % param_.snapshot()) { - // TODO(Yangqing): snapshot - NOT_IMPLEMENTED; + if (param_.snapshot() > 0 && iter_ % param_.snapshot() == 0) { + Snapshot(false); } if (param_.display()) { LOG(ERROR) << "Iteration " << iter_ << ", loss = " << loss; @@ -53,8 +52,10 @@ void Solver::Snapshot(bool is_final) { } else { ss << "_iter_" << iter_; } + string filename = ss.str(); + LOG(ERROR) << "Snapshotting to " << filename; ofstream output_file; - output_file.open(ss.str().c_str()); + output_file.open(filename.c_str()); CHECK(net_param.SerializeToOstream(&output_file)); output_file.close(); } diff --git a/src/caffe/pyutil/convert.py b/src/caffe/pyutil/convert.py index 8a76a50..6bc76ae 100644 --- a/src/caffe/pyutil/convert.py +++ b/src/caffe/pyutil/convert.py @@ -5,8 +5,8 @@ from caffe.proto import caffe_pb2 import numpy as np def blobproto_to_array(blob): - arr = np.array(blob.data).reshape(blob.num(), blob.channels(), blobs.height(), - blobs.width()) + arr = np.array(blob.data).reshape(blob.num, blob.channels, blob.height, + blob.width) return arr def array_to_blobproto(arr): @@ -14,7 +14,7 @@ def array_to_blobproto(arr): raise ValueError('Incorrect array shape.') blob = caffe_pb2.BlobProto() blob.num, blob.channels, blob.height, blob.width = arr.shape; - blob.data.extend(arr.flat) + blob.data.extend(arr.astype(float).flat) return blob def array_to_datum(arr): diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index c991d8d..e6600f2 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -59,7 +59,6 @@ inline void SyncedMemory::to_gpu() { } } - const void* SyncedMemory::cpu_data() { to_cpu(); return (const void*)cpu_ptr_; diff --git a/src/programs/train_alexnet.cpp b/src/programs/train_alexnet.cpp index d6a4ca5..d908859 100644 --- a/src/programs/train_alexnet.cpp +++ b/src/programs/train_alexnet.cpp @@ -41,6 +41,8 @@ int main(int argc, char** argv) { //solver_param.set_power(0.75); solver_param.set_momentum(0.9); solver_param.set_weight_decay(0.0005); + solver_param.set_snapshot(100); + solver_param.set_snapshot_prefix("alexnet"); LOG(ERROR) << "Starting Optimization"; SGDSolver solver(solver_param); -- 2.7.4