net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
}
- LOG(ERROR) << "Setting up the layers.";
+ LOG(INFO) << "Setting up the layers.";
for (int i = 0; i < layers_.size(); ++i) {
LOG(INFO) << "Setting up " << layer_names_[i];
layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
for (int j = 0; j < layer_blobs.size(); ++j) {
params_.push_back(layer_blobs[j]);
}
+ // push the learning rate mutlipliers
+ if (layers_[i]->layer_param().blobs_lr_size()) {
+ CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size());
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ float local_lr = layers_[i]->layer_param().blobs_lr(j);
+ CHECK_GT(local_lr, 0.);
+ params_lr_.push_back(local_lr);
+ }
+ } else {
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ params_lr_.push_back(1.);
+ }
+ }
for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " "
<< top_vecs_[i][topid]->height() << " "
<< top_vecs_[i][topid]->width();
}
}
-
- LOG(ERROR) << "Network initialization done.";
+ LOG(INFO) << "Network initialization done.";
}
template <typename Dtype>
inline vector<vector<Blob<Dtype>*> >& bottom_vecs() { return bottom_vecs_; }
inline vector<vector<Blob<Dtype>*> >& top_vecs() { return top_vecs_; }
// returns the parameters
- vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
+ inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
+ // returns the parameter learning rate multipliers
+ inline vector<float>& params_lr() {return params_lr_; }
// Updates the network
void Update();
string name_;
// The parameters in the network.
vector<shared_ptr<Blob<Dtype> > > params_;
-
+ // the learning rate multipliers
+ vector<float> params_lr_;
DISABLE_COPY_AND_ASSIGN(Net);
};
namespace caffe {
-
template <typename Dtype>
void Solver<Dtype>::Solve(Net<Dtype>* net) {
net_ = net;
template <typename Dtype>
void SGDSolver<Dtype>::ComputeUpdateValue() {
vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
+ vector<float>& net_params_lr = this->net_->params_lr();
// get the learning rate
Dtype rate = GetLearningRate();
if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
Dtype momentum = this->param_.momentum();
Dtype weight_decay = this->param_.weight_decay();
// LOG(ERROR) << "rate:" << rate << " momentum:" << momentum
- // << " weight_decay:" << weight_decay;
+ // << " weight_decay:" << weight_decay;
switch (Caffe::mode()) {
case Caffe::CPU:
for (int param_id = 0; param_id < net_params.size(); ++param_id) {
// Compute the value to history, and then copy them to the blob's diff.
- caffe_axpby(net_params[param_id]->count(), rate,
+ Dtype local_rate = rate * net_params_lr[param_id];
+ caffe_axpby(net_params[param_id]->count(), local_rate,
net_params[param_id]->cpu_diff(), momentum,
history_[param_id]->mutable_cpu_data());
if (weight_decay) {
// add weight decay
- caffe_axpy(net_params[param_id]->count(), weight_decay * rate,
+ caffe_axpy(net_params[param_id]->count(),
+ weight_decay * local_rate,
net_params[param_id]->cpu_data(),
history_[param_id]->mutable_cpu_data());
}
case Caffe::GPU:
for (int param_id = 0; param_id < net_params.size(); ++param_id) {
// Compute the value to history, and then copy them to the blob's diff.
- caffe_gpu_axpby(net_params[param_id]->count(), rate,
+ Dtype local_rate = rate * net_params_lr[param_id];
+ caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
net_params[param_id]->gpu_diff(), momentum,
history_[param_id]->mutable_gpu_data());
if (weight_decay) {
// add weight decay
- caffe_gpu_axpy(net_params[param_id]->count(), weight_decay * rate,
+ caffe_gpu_axpy(net_params[param_id]->count(),
+ weight_decay * local_rate,
net_params[param_id]->gpu_data(),
history_[param_id]->mutable_gpu_data());
}
// The blobs containing the numeric parameters of the layer
repeated BlobProto blobs = 50;
+ // The ratio that is multiplied on the global learning rate. If you want to set
+ // the learning ratio for one blob, you need to set it for all blobs.
+ repeated float blobs_lr = 51;
}
message LayerConnection {
typedef ::testing::Types<float, double> Dtypes;
TYPED_TEST_CASE(SolverTest, Dtypes);
-TYPED_TEST(SolverTest, TestSolve) {
+TYPED_TEST(SolverTest, TestSolveGPU) {
Caffe::set_mode(Caffe::GPU);
NetParameter net_param;
EXPECT_EQ(caffe_net.blob_names().size(), 3);
// Run the network without training.
- LOG(ERROR) << "Performing Forward";
+ LOG(INFO) << "Performing Forward";
caffe_net.Forward(bottom_vec);
- LOG(ERROR) << "Performing Backward";
- LOG(ERROR) << "Initial loss: " << caffe_net.Backward();
+ LOG(INFO) << "Performing Backward";
+ LOG(INFO) << "Initial loss: " << caffe_net.Backward();
SolverParameter solver_param;
solver_param.set_base_lr(0.1);
solver_param.set_power(0.75);
solver_param.set_momentum(0.9);
- LOG(ERROR) << "Starting Optimization";
+ LOG(INFO) << "Starting Optimization";
SGDSolver<TypeParam> solver(solver_param);
solver.Solve(&caffe_net);
- LOG(ERROR) << "Optimization Done.";
- LOG(ERROR) << "Weight: " << caffe_net.params()[0]->cpu_data()[0] << ", "
+ LOG(INFO) << "Optimization Done.";
+ LOG(INFO) << "Weight: " << caffe_net.params()[0]->cpu_data()[0] << ", "
<< caffe_net.params()[0]->cpu_data()[1];
- LOG(ERROR) << "Bias: " << caffe_net.params()[1]->cpu_data()[0];
+ LOG(INFO) << "Bias: " << caffe_net.params()[1]->cpu_data()[0];
+
+ EXPECT_GE(caffe_net.params()[0]->cpu_data()[0], 0.3);
+ EXPECT_LE(caffe_net.params()[0]->cpu_data()[0], 0.35);
+
+ EXPECT_GE(caffe_net.params()[0]->cpu_data()[1], 0.3);
+ EXPECT_LE(caffe_net.params()[0]->cpu_data()[1], 0.35);
+
+ EXPECT_GE(caffe_net.params()[1]->cpu_data()[0], -0.01);
+ EXPECT_LE(caffe_net.params()[1]->cpu_data()[0], 0.01);
+}
+
+
+
+TYPED_TEST(SolverTest, TestSolveCPU) {
+ Caffe::set_mode(Caffe::CPU);
+
+ NetParameter net_param;
+ ReadProtoFromTextFile("data/linear_regression.prototxt",
+ &net_param);
+ // check if things are right
+ EXPECT_EQ(net_param.layers_size(), 3);
+ EXPECT_EQ(net_param.input_size(), 0);
+ vector<Blob<TypeParam>*> bottom_vec;
+ Net<TypeParam> caffe_net(net_param, bottom_vec);
+ EXPECT_EQ(caffe_net.layer_names().size(), 3);
+ EXPECT_EQ(caffe_net.blob_names().size(), 3);
+
+ // Run the network without training.
+ LOG(INFO) << "Performing Forward";
+ caffe_net.Forward(bottom_vec);
+ LOG(INFO) << "Performing Backward";
+ LOG(INFO) << "Initial loss: " << caffe_net.Backward();
+
+ SolverParameter solver_param;
+ solver_param.set_base_lr(0.1);
+ solver_param.set_display(0);
+ solver_param.set_max_iter(100);
+ solver_param.set_lr_policy("inv");
+ solver_param.set_gamma(1.);
+ solver_param.set_power(0.75);
+ solver_param.set_momentum(0.9);
+
+ LOG(INFO) << "Starting Optimization";
+ SGDSolver<TypeParam> solver(solver_param);
+ solver.Solve(&caffe_net);
+ LOG(INFO) << "Optimization Done.";
+ LOG(INFO) << "Weight: " << caffe_net.params()[0]->cpu_data()[0] << ", "
+ << caffe_net.params()[0]->cpu_data()[1];
+ LOG(INFO) << "Bias: " << caffe_net.params()[1]->cpu_data()[0];
EXPECT_GE(caffe_net.params()[0]->cpu_data()[0], 0.3);
EXPECT_LE(caffe_net.params()[0]->cpu_data()[0], 0.35);
// all the intermediate blobs produced by the net to individual binary
// files stored in protobuffer binary formats.
// Usage:
-// dump_network input_net_param trained_net_param input_blob output_prefix
+// dump_network input_net_param trained_net_param input_blob output_prefix 0/1
+// if input_net_param is 'none', we will directly load the network from
+// trained_net_param. If the last argv is 1, we will do a forward-backward pass
+// before dumping everyting, and also dump the who network.
#include <cuda_runtime.h>
#include <fcntl.h>
NetParameter net_param;
NetParameter trained_net_param;
- ReadProtoFromTextFile(argv[1], &net_param);
- ReadProtoFromBinaryFile(argv[2], &trained_net_param);
- BlobProto input_blob_proto;
- ReadProtoFromBinaryFile(argv[3], &input_blob_proto);
- shared_ptr<Blob<float> > input_blob(new Blob<float>());
- input_blob->FromProto(input_blob_proto);
+ if (strcmp(argv[1], "none") == 0) {
+ // We directly load the net param from trained file
+ ReadProtoFromBinaryFile(argv[2], &net_param);
+ } else {
+ ReadProtoFromTextFile(argv[1], &net_param);
+ }
+ ReadProtoFromBinaryFile(argv[2], &trained_net_param);
+
vector<Blob<float>* > input_vec;
- input_vec.push_back(input_blob.get());
- // For implementational reasons, we need to first set up the net, and
- // then copy the trained parameters.
+ if (strcmp(argv[3], "none") != 0) {
+ BlobProto input_blob_proto;
+ ReadProtoFromBinaryFile(argv[3], &input_blob_proto);
+ shared_ptr<Blob<float> > input_blob(new Blob<float>());
+ input_blob->FromProto(input_blob_proto);
+ input_vec.push_back(input_blob.get());
+ }
+
shared_ptr<Net<float> > caffe_net(new Net<float>(net_param, input_vec));
caffe_net->CopyTrainedLayersFrom(trained_net_param);
+ string output_prefix(argv[4]);
// Run the network without training.
LOG(ERROR) << "Performing Forward";
caffe_net->Forward(input_vec);
-
+ if (argc > 4 && strcmp(argv[4], "1")) {
+ LOG(ERROR) << "Performing Backward";
+ caffe_net->Backward();
+ // Dump the network
+ NetParameter output_net_param;
+ caffe_net->ToProto(&output_net_param, true);
+ WriteProtoToBinaryFile(output_net_param, output_prefix + output_net_param.name());
+ }
// Now, let's dump all the layers
- string output_prefix(argv[4]);
+
const vector<string>& blob_names = caffe_net->blob_names();
const vector<shared_ptr<Blob<float> > >& blobs = caffe_net->blobs();
for (int blobid = 0; blobid < caffe_net->blobs().size(); ++blobid) {
WriteProtoToBinaryFile(output_blob_proto, output_prefix + blob_names[blobid]);
}
- // Dump results.
return 0;
}
name: "data"
type: "data"
source: "/home/jiayq/caffe-train-leveldb"
- batchsize: 96
+ batchsize: 128
subtraction: 114
cropsize: 227
- mirror: false
+ mirror: true
}
top: "data"
top: "label"
type: "constant"
value: 0
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "data"
top: "conv1"
type: "constant"
value: 1
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "pad2"
top: "conv2"
type: "constant"
value: 0
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "pad3"
top: "conv3"
type: "constant"
value: 1
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "pad4"
top: "conv4"
type: "constant"
value: 1
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "pad5"
top: "conv5"
type: "constant"
value: 1
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "pool5"
top: "fc6"
type: "constant"
value: 1
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "drop6"
top: "fc7"
type: "constant"
value: 0
}
+ blobs_lr: 1.
+ blobs_lr: 2.
}
bottom: "drop7"
top: "fc8"