# a lowercase prefix (in this case "program") and an uppercased suffix (in this case "NAME"), separated
# by an underscore is used to name attributes for a common element. Think of this like
# using program.NAME, program.C_SRCS, etc. There are no structs in Make, so we use this convention
-# to keep track of attributes that all belong to the same target or program.
+# to keep track of attributes that all belong to the same target or program.
#
PROJECT := caffe
NAME := lib$(PROJECT).so
program: $(OBJS) $(PROGRAM_BINS)
runtest: test
- for testbin in $(TEST_BINS); do $$testbin; done
+ for testbin in $(TEST_BINS); do $$testbin 1; done
$(TEST_BINS): %.testbin : %.o
$(CXX) -pthread $< $(OBJS) $(GTEST_OBJ) -o $@ $(LDFLAGS) $(WARNINGS)
//
// It fills the incoming matrix by randomly sampling uniform data from
// [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
-// of input nodes, and in our case we consider the blob width as the scale.
-// You should make sure the input blob has shape (1, 1, height, width).
+// of input nodes. You should make sure the input blob has shape (num, a, b, c)
+// where a * b * c = fan_in.
template <typename Dtype>
class XavierFiller : public Filler<Dtype> {
public:
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
CHECK(blob->count());
- CHECK_EQ(blob->num(), 1) << "XavierFiller requires blob.num() = 1.";
- CHECK_EQ(blob->channels(), 1)
- << "XavierFiller requires blob.channels() = 1.";
- int fan_in = blob->width();
+ int fan_in = blob->count() / blob->num();
Dtype scale = sqrt(Dtype(3) / fan_in);
caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(),
-scale, scale);
this->blobs_.resize(1);
}
// Intialize the weight
- this->blobs_[0].reset(new Blob<Dtype>(1, 1, NUM_OUTPUT_, K_));
+ this->blobs_[0].reset(
+ new Blob<Dtype>(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_));
// fill the weights
shared_ptr<Filler<Dtype> > weight_filler(
GetFiller<Dtype>(this->layer_param_.weight_filler()));
net_->Update();
// Check if we need to do snapshot
- if (param_.snapshot() > 0 && iter_ % param_.snapshot()) {
- // TODO(Yangqing): snapshot
- NOT_IMPLEMENTED;
+ if (param_.snapshot() > 0 && iter_ % param_.snapshot() == 0) {
+ Snapshot(false);
}
if (param_.display()) {
LOG(ERROR) << "Iteration " << iter_ << ", loss = " << loss;
} else {
ss << "_iter_" << iter_;
}
+ string filename = ss.str();
+ LOG(ERROR) << "Snapshotting to " << filename;
ofstream output_file;
- output_file.open(ss.str().c_str());
+ output_file.open(filename.c_str());
CHECK(net_param.SerializeToOstream(&output_file));
output_file.close();
}
import numpy as np
def blobproto_to_array(blob):
- arr = np.array(blob.data).reshape(blob.num(), blob.channels(), blobs.height(),
- blobs.width())
+ arr = np.array(blob.data).reshape(blob.num, blob.channels, blob.height,
+ blob.width)
return arr
def array_to_blobproto(arr):
raise ValueError('Incorrect array shape.')
blob = caffe_pb2.BlobProto()
blob.num, blob.channels, blob.height, blob.width = arr.shape;
- blob.data.extend(arr.flat)
+ blob.data.extend(arr.astype(float).flat)
return blob
def array_to_datum(arr):
}
}
-
const void* SyncedMemory::cpu_data() {
to_cpu();
return (const void*)cpu_ptr_;
//solver_param.set_power(0.75);
solver_param.set_momentum(0.9);
solver_param.set_weight_decay(0.0005);
+ solver_param.set_snapshot(100);
+ solver_param.set_snapshot_prefix("alexnet");
LOG(ERROR) << "Starting Optimization";
SGDSolver<float> solver(solver_param);