From: Yangqing Jia Date: Mon, 7 Oct 2013 19:35:13 +0000 (-0700) Subject: misc update X-Git-Tag: submit/tizen/20180823.020014~960 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bc520bdf619ce0d3bbfefb0b4b7e67fd0a27dc2e;p=platform%2Fupstream%2Fcaffeonacl.git misc update --- diff --git a/src/caffe/layer.hpp b/src/caffe/layer.hpp index 9898cbc..2b73daf 100644 --- a/src/caffe/layer.hpp +++ b/src/caffe/layer.hpp @@ -34,11 +34,13 @@ class Layer { const bool propagate_down, vector*>* bottom); - // Returns the vector of parameters. - vector > >& params() { + // Returns the vector of blobs. + vector > >& blobs() { return blobs_; } + // Returns the layer parameter + const LayerParameter& layer_param() { return layer_param_; } // Writes the layer parameter to a protocol buffer virtual void ToProto(LayerParameter* param, bool write_diff = false); diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index c0ccbb1..6b5e4af 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -89,9 +89,14 @@ Net::Net(const NetParameter& param, for (int i = 0; i < layers_.size(); ++i) { LOG(INFO) << "Setting up " << layer_names_[i]; layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]); - vector > >& layer_params = layers_[i]->params(); - for (int j = 0; j < layer_params.size(); ++j) { - params_.push_back(layer_params[j]); + vector > >& layer_blobs = layers_[i]->blobs(); + for (int j = 0; j < layer_blobs.size(); ++j) { + params_.push_back(layer_blobs[j]); + } + for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { + LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " " + << top_vecs_[i][topid]->height() << " " + << top_vecs_[i][topid]->width(); } } @@ -106,7 +111,7 @@ const vector*>& Net::Forward( blobs_[net_input_blob_indices_[i]]->CopyFrom(*bottom[i]); } for (int i = 0; i < layers_.size(); ++i) { - //LOG(ERROR) << "Forwarding " << layer_names_[i]; + // LOG(ERROR) << "Forwarding " << layer_names_[i]; layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]); } return net_output_blobs_; @@ -141,7 +146,7 @@ void Net::CopyTrainedLayersFrom(const NetParameter& param) { } LOG(INFO) << "Loading source layer " << source_layer_name; vector > >& target_blobs = - layers_[target_layer_id]->params(); + layers_[target_layer_id]->blobs(); CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { diff --git a/src/caffe/optimization/solver.cpp b/src/caffe/optimization/solver.cpp index a48408c..cb288b3 100644 --- a/src/caffe/optimization/solver.cpp +++ b/src/caffe/optimization/solver.cpp @@ -19,7 +19,8 @@ namespace caffe { template void Solver::Solve(Net* net) { net_ = net; - LOG(INFO) << "Solving net " << net_->name(); + LOG(INFO) << "Solving " << net_->name(); + PreSolve(); iter_ = 0; // For a network that is trained by the solver, no bottom or top vecs // should be given, and we will just provide dummy vecs. @@ -79,10 +80,11 @@ Dtype SGDSolver::GetLearningRate() { } template -void SGDSolver::ComputeUpdateValue() { +void SGDSolver::PreSolve() { // First of all, see if we need to initialize the history vector > >& net_params = this->net_->params(); - if (history_.size() == 0 && this->param_.momentum() > 0) { + history_.clear(); + if (this->param_.momentum() > 0) { for (int i = 0; i < net_params.size(); ++i) { const Blob* net_param = net_params[i].get(); history_.push_back(shared_ptr >(new Blob( @@ -90,45 +92,54 @@ void SGDSolver::ComputeUpdateValue() { net_param->width()))); } } +} + +template +void SGDSolver::ComputeUpdateValue() { + vector > >& net_params = this->net_->params(); // get the learning rate Dtype rate = GetLearningRate(); - if (this->param_.momentum() == 0) { - for (int i = 0; i < net_params.size(); ++i) { - switch (Caffe::mode()) { - case Caffe::CPU: - caffe_scal(net_params[i]->count(), rate, - net_params[i]->mutable_cpu_diff()); - break; - case Caffe::GPU: - caffe_gpu_scal(net_params[i]->count(), rate, - net_params[i]->mutable_gpu_diff()); - break; - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + Dtype momentum = this->param_.momentum(); + Dtype weight_decay = this->param_.weight_decay(); + switch (Caffe::mode()) { + case Caffe::CPU: + for (int param_id = 0; param_id < net_params.size(); ++param_id) { + // Compute the value to history, and then copy them to the blob's diff. + caffe_axpby(net_params[param_id]->count(), rate, + net_params[param_id]->cpu_diff(), momentum, + history_[param_id]->mutable_cpu_data()); + if (weight_decay) { + // add weight decay + caffe_axpy(net_params[param_id]->count(), weight_decay * rate, + net_params[param_id]->cpu_data(), + history_[param_id]->mutable_cpu_data()); } + // copy + caffe_copy(net_params[param_id]->count(), + history_[param_id]->cpu_data(), + net_params[param_id]->mutable_cpu_diff()); } - } else { - // Need to maintain momentum - for (int i = 0; i < net_params.size(); ++i) { - switch (Caffe::mode()) { - case Caffe::CPU: - caffe_axpby(net_params[i]->count(), rate, - net_params[i]->cpu_diff(), Dtype(this->param_.momentum()), - history_[i]->mutable_cpu_data()); - caffe_copy(net_params[i]->count(), history_[i]->cpu_data(), - net_params[i]->mutable_cpu_diff()); - break; - case Caffe::GPU: - caffe_gpu_axpby(net_params[i]->count(), rate, - net_params[i]->gpu_diff(), Dtype(this->param_.momentum()), - history_[i]->mutable_gpu_data()); - caffe_gpu_copy(net_params[i]->count(), history_[i]->gpu_data(), - net_params[i]->mutable_gpu_diff()); - break; - default: - LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + break; + case Caffe::GPU: + for (int param_id = 0; param_id < net_params.size(); ++param_id) { + // Compute the value to history, and then copy them to the blob's diff. + caffe_gpu_axpby(net_params[param_id]->count(), rate, + net_params[param_id]->gpu_diff(), momentum, + history_[param_id]->mutable_gpu_data()); + if (weight_decay) { + // add weight decay + caffe_gpu_axpy(net_params[param_id]->count(), weight_decay * rate, + net_params[param_id]->gpu_data(), + history_[param_id]->mutable_gpu_data()); } + // copy + caffe_gpu_copy(net_params[param_id]->count(), + history_[param_id]->gpu_data(), + net_params[param_id]->mutable_gpu_diff()); } + break; + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } } diff --git a/src/caffe/optimization/solver.hpp b/src/caffe/optimization/solver.hpp index 0a78d88..f20a06e 100644 --- a/src/caffe/optimization/solver.hpp +++ b/src/caffe/optimization/solver.hpp @@ -12,6 +12,9 @@ class Solver { void Solve(Net* net); protected: + // PreSolve is run before any solving iteration starts, allowing one to + // put up some scaffold. + virtual void PreSolve() {}; // Get the update value for the current iteration. virtual void ComputeUpdateValue() = 0; void Snapshot(bool is_final = false); @@ -29,6 +32,7 @@ class SGDSolver : public Solver { : Solver(param) {} protected: + virtual void PreSolve(); Dtype GetLearningRate(); virtual void ComputeUpdateValue(); // history maintains the historical momentum data. diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index eef6058..0231ad9 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -95,6 +95,7 @@ message SolverParameter { optional float gamma = 8; // The parameter to compute the learning rate. optional float power = 9; // The parameter to compute the learning rate. optional float momentum = 10; // The momentum value. + optional float weight_decay = 11; // The weight decay. - optional string snapshot_prefix = 11; // The prefix for the snapshot. + optional string snapshot_prefix = 12; // The prefix for the snapshot. } diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp index c540549..55a5b95 100644 --- a/src/caffe/test/test_gradient_check_util.hpp +++ b/src/caffe/test/test_gradient_check_util.hpp @@ -65,8 +65,8 @@ void GradientChecker::CheckGradientSingle(Layer& layer, int check_bottom, int top_id, int top_data_id) { // First, figure out what blobs we need to check against. vector*> blobs_to_check; - for (int i = 0; i < layer.params().size(); ++i) { - blobs_to_check.push_back(layer.params()[i].get()); + for (int i = 0; i < layer.blobs().size(); ++i) { + blobs_to_check.push_back(layer.blobs()[i].get()); } if (check_bottom < 0) { for (int i = 0; i < bottom.size(); ++i) { diff --git a/src/programs/convert_dataset.cpp b/src/programs/convert_dataset.cpp index 53a1e29..3bf7794 100644 --- a/src/programs/convert_dataset.cpp +++ b/src/programs/convert_dataset.cpp @@ -57,8 +57,13 @@ int main(int argc, char** argv) { leveldb::WriteBatch* batch = new leveldb::WriteBatch(); while (infile >> filename >> label) { ReadImageToDatum(root_folder + filename, label, &datum); + // sequential sprintf(key_cstr, "%08d_%s", count, filename.c_str()); string key(key_cstr); + // random + // string key; + // GenerateRandomPrefix(8, &key); + // key += filename; string value; // get the value datum.SerializeToString(&value); diff --git a/src/programs/demo_mnist.cpp b/src/programs/demo_mnist.cpp index 7c0937b..e5712a8 100644 --- a/src/programs/demo_mnist.cpp +++ b/src/programs/demo_mnist.cpp @@ -40,6 +40,7 @@ int main(int argc, char** argv) { solver_param.set_gamma(0.0001); solver_param.set_power(0.75); solver_param.set_momentum(0.9); + solver_param.set_weight_decay(0.0005); LOG(ERROR) << "Starting Optimization"; SGDSolver solver(solver_param); diff --git a/src/programs/train_alexnet.cpp b/src/programs/train_alexnet.cpp index c86a946..d6a4ca5 100644 --- a/src/programs/train_alexnet.cpp +++ b/src/programs/train_alexnet.cpp @@ -32,21 +32,15 @@ int main(int argc, char** argv) { LOG(ERROR) << "Performing Backward"; LOG(ERROR) << "Initial loss: " << caffe_net.Backward(); - // Run the network without training. - LOG(ERROR) << "Multiple Passes"; - for (int i = 0; i < 100; ++i) { - caffe_net.ForwardBackward(bottom_vec); - } - LOG(ERROR) << "Multiple passes done."; -/* SolverParameter solver_param; - solver_param.set_base_lr(0.01); - solver_param.set_display(0); - solver_param.set_max_iter(6000); - solver_param.set_lr_policy("inv"); - solver_param.set_gamma(0.0001); - solver_param.set_power(0.75); + solver_param.set_base_lr(0.001); + solver_param.set_display(1); + solver_param.set_max_iter(600000); + solver_param.set_lr_policy("fixed"); + //solver_param.set_gamma(0.0001); + //solver_param.set_power(0.75); solver_param.set_momentum(0.9); + solver_param.set_weight_decay(0.0005); LOG(ERROR) << "Starting Optimization"; SGDSolver solver(solver_param); @@ -60,41 +54,5 @@ int main(int argc, char** argv) { float loss = caffe_net.Backward(); LOG(ERROR) << "Final loss: " << loss; - NetParameter trained_net_param; - caffe_net.ToProto(&trained_net_param); - - NetParameter traintest_net_param; - ReadProtoFromTextFile("caffe/test/data/lenet_traintest.prototxt", - &traintest_net_param); - Net caffe_traintest_net(traintest_net_param, bottom_vec); - caffe_traintest_net.CopyTrainedLayersFrom(trained_net_param); - - // Test run - double train_accuracy = 0; - int batch_size = traintest_net_param.layers(0).layer().batchsize(); - for (int i = 0; i < 60000 / batch_size; ++i) { - const vector*>& result = - caffe_traintest_net.Forward(bottom_vec); - train_accuracy += result[0]->cpu_data()[0]; - } - train_accuracy /= 60000 / batch_size; - LOG(ERROR) << "Train accuracy:" << train_accuracy; - - NetParameter test_net_param; - ReadProtoFromTextFile("caffe/test/data/lenet_test.prototxt", &test_net_param); - Net caffe_test_net(test_net_param, bottom_vec); - caffe_test_net.CopyTrainedLayersFrom(trained_net_param); - - // Test run - double test_accuracy = 0; - batch_size = test_net_param.layers(0).layer().batchsize(); - for (int i = 0; i < 10000 / batch_size; ++i) { - const vector*>& result = - caffe_test_net.Forward(bottom_vec); - test_accuracy += result[0]->cpu_data()[0]; - } - test_accuracy /= 10000 / batch_size; - LOG(ERROR) << "Test accuracy:" << test_accuracy; -*/ return 0; }