From 22f45e67ab9e9fc9bbc3f3f5bde3841e58ec3c19 Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Thu, 24 Oct 2013 22:36:00 -0700 Subject: [PATCH] pushing missing checkout --- src/caffe/common.cpp | 4 +-- src/caffe/net.cpp | 71 +++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp index c254d70..1fce86a 100644 --- a/src/caffe/common.cpp +++ b/src/caffe/common.cpp @@ -36,13 +36,13 @@ Caffe::Caffe() // Try to create a curand handler. if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT) != CURAND_STATUS_SUCCESS || - curandSetPseudoRandomGeneratorSeed(curand_generator_, time(NULL)) + curandSetPseudoRandomGeneratorSeed(curand_generator_, 1701ULL) != CURAND_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; } // Try to create a vsl stream. This should almost always work, but we will // check it anyway. - if (vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, time(NULL)) != VSL_STATUS_OK) { + if (vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701) != VSL_STATUS_OK) { LOG(ERROR) << "Cannot create vsl stream. VSL random number generator " << "won't be available."; } diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index e1442ec..38a806d 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -34,6 +34,7 @@ Net::Net(const NetParameter& param, bottom[i]->height(), bottom[i]->width())); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); + blob_need_backward_.push_back(false); net_input_blob_indices_.push_back(i); blob_name_to_idx[blob_name] = i; available_blobs.insert(blob_name); @@ -49,17 +50,21 @@ Net::Net(const NetParameter& param, layers_.push_back(shared_ptr >(GetLayer(layer_param))); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); + bool need_backward = false; // Figure out this layer's input and output for (int j = 0; j < layer_connection.bottom_size(); ++j) { const string& blob_name = layer_connection.bottom(j); + const int blob_id = blob_name_to_idx[blob_name]; if (available_blobs.find(blob_name) == available_blobs.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer" << j; } LOG(INFO) << layer_param.name() << " <- " << blob_name; bottom_vecs_[i].push_back( - blobs_[blob_name_to_idx[blob_name]].get()); - bottom_id_vecs_[i].push_back(blob_name_to_idx[blob_name]); + blobs_[blob_id].get()); + bottom_id_vecs_[i].push_back(blob_id); + // If a blob needs backward, this layer should provide it. + need_backward |= blob_need_backward_[blob_id]; available_blobs.erase(blob_name); } for (int j = 0; j < layer_connection.top_size(); ++j) { @@ -83,12 +88,35 @@ Net::Net(const NetParameter& param, shared_ptr > blob_pointer(new Blob()); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); + blob_need_backward_.push_back(false); blob_name_to_idx[blob_name] = blob_names_.size() - 1; available_blobs.insert(blob_name); top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get()); top_id_vecs_[i].push_back(blob_names_.size() - 1); } } + // After this layer is connected, set it up. + // LOG(INFO) << "Setting up " << layer_names_[i]; + layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]); + for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { + LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " " + << top_vecs_[i][topid]->height() << " " + << top_vecs_[i][topid]->width(); + } + // Check if this layer needs backward operation itself + for (int j = 0; j < layers_[i]->layer_param().blobs_lr_size(); ++j) { + need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0); + } + // Finally, set the backward flag + layer_need_backward_.push_back(need_backward); + if (need_backward) { + LOG(INFO) << layer_names_[i] << " needs backward computation."; + for (int j = 0; j < top_id_vecs_[i].size(); ++j) { + blob_need_backward_[top_id_vecs_[i][j]] = true; + } + } else { + LOG(INFO) << layer_names_[i] << " does not need backward computation."; + } } // In the end, all remaining blobs are considered output blobs. for (set::iterator it = available_blobs.begin(); @@ -97,11 +125,15 @@ Net::Net(const NetParameter& param, net_output_blob_indices_.push_back(blob_name_to_idx[*it]); net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); } + GetLearningRateAndWeightDecay(); + LOG(INFO) << "Network initialization done."; +} + - LOG(INFO) << "Setting up the layers."; +template +void Net::GetLearningRateAndWeightDecay() { + LOG(INFO) << "Collecting Learning Rate and Weight Decay."; for (int i = 0; i < layers_.size(); ++i) { - LOG(INFO) << "Setting up " << layer_names_[i]; - layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]); vector > >& layer_blobs = layers_[i]->blobs(); for (int j = 0; j < layer_blobs.size(); ++j) { params_.push_back(layer_blobs[j]); @@ -111,7 +143,7 @@ Net::Net(const NetParameter& param, CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size()); for (int j = 0; j < layer_blobs.size(); ++j) { float local_lr = layers_[i]->layer_param().blobs_lr(j); - CHECK_GT(local_lr, 0.); + CHECK_GE(local_lr, 0.); params_lr_.push_back(local_lr); } } else { @@ -119,13 +151,21 @@ Net::Net(const NetParameter& param, params_lr_.push_back(1.); } } - for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { - LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " " - << top_vecs_[i][topid]->height() << " " - << top_vecs_[i][topid]->width(); + // push the weight decay multipliers + if (layers_[i]->layer_param().weight_decay_size()) { + CHECK_EQ(layers_[i]->layer_param().weight_decay_size(), + layer_blobs.size()); + for (int j = 0; j < layer_blobs.size(); ++j) { + float local_decay = layers_[i]->layer_param().weight_decay(j); + CHECK_GE(local_decay, 0.); + params_weight_decay_.push_back(local_decay); + } + } else { + for (int j = 0; j < layer_blobs.size(); ++j) { + params_weight_decay_.push_back(1.); + } } } - LOG(INFO) << "Network initialization done."; } template @@ -145,11 +185,12 @@ const vector*>& Net::Forward( template Dtype Net::Backward() { Dtype loss = 0; - // TODO(Yangqing): figure out those layers that do not need backward. for (int i = layers_.size() - 1; i >= 0; --i) { - Dtype layer_loss = layers_[i]->Backward( - top_vecs_[i], true, &bottom_vecs_[i]); - loss += layer_loss; + if (layer_need_backward_[i]) { + Dtype layer_loss = layers_[i]->Backward( + top_vecs_[i], true, &bottom_vecs_[i]); + loss += layer_loss; + } } return loss; } -- 2.7.4