// returns the parameters
inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
// returns the parameter learning rate multipliers
- inline vector<float>& params_lr() {return params_lr_; }
+ inline vector<float>& params_lr() { return params_lr_; }
inline vector<float>& params_weight_decay() { return params_weight_decay_; }
+ const map<string, int>& param_names_index() { return param_names_index_; }
// Input and output blob numbers
inline int num_inputs() { return net_input_blobs_.size(); }
inline int num_outputs() { return net_output_blobs_.size(); }
const shared_ptr<Blob<Dtype> > blob_by_name(const string& blob_name);
bool has_layer(const string& layer_name);
const shared_ptr<Layer<Dtype> > layer_by_name(const string& layer_name);
- const map<string, int>& param_names_index() { return param_names_index_; }
+
+ void set_debug_info(const bool value) { debug_info_ = value; }
protected:
// Helpers for Init.
map<string, int>* blob_name_to_idx);
void AppendParam(const NetParameter& param, const int layer_id,
const int param_id);
+
+ // Helpers for displaying debug info.
+ void ForwardDebugInfo(const int layer_id);
+ void BackwardDebugInfo(const int layer_id);
+ void UpdateDebugInfo(const int param_id);
+
// Function to get misc parameters, e.g. the learning rate multiplier and
// weight decay.
void GetLearningRateAndWeightDecay();
vector<vector<Blob<Dtype>*> > top_vecs_;
vector<vector<int> > top_id_vecs_;
vector<int> param_owners_;
- vector<pair<int, int> > layer_param_indices_;
+ vector<string> param_display_names_;
+ vector<pair<int, int> > param_layer_indices_;
map<string, int> param_names_index_;
// blob indices for the input and the output of the net
vector<int> net_input_blob_indices_;
vector<float> params_weight_decay_;
// The bytes of memory used by this net
size_t memory_used_;
+ // Whether to compute and display debug info for the net.
+ bool debug_info_;
+
DISABLE_COPY_AND_ASSIGN(Net);
};
}
}
+template <> unsigned int Blob<unsigned int>::asum_data() const {
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+template <> int Blob<int>::asum_data() const {
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+template <typename Dtype>
+Dtype Blob<Dtype>::asum_data() const {
+ if (!data_) { return 0; }
+ switch (data_->head()) {
+ case SyncedMemory::HEAD_AT_CPU:
+ return caffe_cpu_asum(count_, cpu_data());
+ case SyncedMemory::HEAD_AT_GPU:
+ case SyncedMemory::SYNCED:
+#ifndef CPU_ONLY
+ {
+ Dtype asum;
+ caffe_gpu_asum(count_, gpu_data(), &asum);
+ return asum;
+ }
+#else
+ NO_GPU;
+#endif
+ case SyncedMemory::UNINITIALIZED:
+ return 0;
+ default:
+ LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head();
+ }
+ return 0;
+}
+
+template <> unsigned int Blob<unsigned int>::asum_diff() const {
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+template <> int Blob<int>::asum_diff() const {
+ NOT_IMPLEMENTED;
+ return 0;
+}
+
+template <typename Dtype>
+Dtype Blob<Dtype>::asum_diff() const {
+ if (!diff_) { return 0; }
+ switch (diff_->head()) {
+ case SyncedMemory::HEAD_AT_CPU:
+ return caffe_cpu_asum(count_, cpu_diff());
+ case SyncedMemory::HEAD_AT_GPU:
+ case SyncedMemory::SYNCED:
+#ifndef CPU_ONLY
+ {
+ Dtype asum;
+ caffe_gpu_asum(count_, gpu_diff(), &asum);
+ return asum;
+ }
+#else
+ NO_GPU;
+#endif
+ case SyncedMemory::UNINITIALIZED:
+ return 0;
+ default:
+ LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head();
+ }
+ return 0;
+}
+
template <typename Dtype>
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
if (num_ != source.num() || channels_ != source.channels() ||
GetLearningRateAndWeightDecay();
LOG(INFO) << "Network initialization done.";
LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
+ // Don't display debug info by default.
+ debug_info_ = false;
}
// Helper for Net::Init: add a new input or top blob to the net. (Inputs have
const int param_id) {
const LayerParameter& layer_param = layers_[layer_id]->layer_param();
const int param_size = layer_param.param_size();
- string param_name;
- if (param_size) {
- param_name = layer_param.param(param_id);
+ string param_name = param_size ? layer_param.param(param_id) : "";
+ if (param_name.size()) {
+ param_display_names_.push_back(param_name);
+ } else {
+ ostringstream param_display_name;
+ param_display_name << param_id;
+ param_display_names_.push_back(param_display_name.str());
}
const int net_param_id = params_.size();
params_.push_back(layers_[layer_id]->blobs()[param_id]);
- layer_param_indices_.push_back(make_pair(layer_id, param_id));
+ param_layer_indices_.push_back(make_pair(layer_id, param_id));
if (!param_size || !param_name.size() || (param_name.size() &&
param_names_index_.find(param_name) == param_names_index_.end())) {
// This layer "owns" this parameter blob -- it is either anonymous
const int owner_net_param_id = param_names_index_[param_name];
param_owners_.push_back(owner_net_param_id);
const pair<int, int>& owner_index =
- layer_param_indices_[owner_net_param_id];
+ param_layer_indices_[owner_net_param_id];
const int owner_layer_id = owner_index.first;
const int owner_param_id = owner_index.second;
LOG(INFO) << "Sharing parameters '" << param_name << "' owned by "
// LOG(ERROR) << "Forwarding " << layer_names_[i];
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
loss += layer_loss;
+ if (debug_info_) { ForwardDebugInfo(i); }
}
return loss;
}
if (layer_need_backward_[i]) {
layers_[i]->Backward(
top_vecs_[i], bottom_need_backward_[i], &bottom_vecs_[i]);
+ if (debug_info_) { BackwardDebugInfo(i); }
}
}
}
template <typename Dtype>
+void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
+ for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
+ const Blob<Dtype>& blob = *top_vecs_[layer_id][top_id];
+ const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
+ const Dtype asum = blob.asum_data();
+ const Dtype asum_mean = asum / blob.count();
+ LOG(INFO) << " [Forward] "
+ << "Layer " << layer_names_[layer_id] << ", top blob " << blob_name
+ << " data: " << asum << " (" << asum_mean << ")";
+ }
+}
+
+template <typename Dtype>
+void Net<Dtype>::BackwardDebugInfo(const int layer_id) {
+ const vector<Blob<Dtype>*>& bottom_vec = bottom_vecs_[layer_id];
+ for (int bottom_id = 0; bottom_id < bottom_vec.size(); ++bottom_id) {
+ if (!bottom_need_backward_[layer_id][bottom_id]) { continue; }
+ const Blob<Dtype>& blob = *bottom_vec[bottom_id];
+ const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
+ const Dtype asum = blob.asum_diff();
+ const Dtype asum_mean = asum / blob.count();
+ LOG(INFO) << " [Backward] "
+ << "Layer " << layer_names_[layer_id] << ", bottom blob " << blob_name
+ << " diff: " << asum << " (" << asum_mean << ")";
+ }
+ for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
+ ++param_id) {
+ if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; }
+ const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];
+ const Dtype asum = blob.asum_diff();
+ const Dtype asum_mean = asum / blob.count();
+ LOG(INFO) << " [Backward] "
+ << "Layer " << layer_names_[layer_id] << ", param blob " << param_id
+ << " diff: " << asum << " (" << asum_mean << ")";
+ }
+}
+
+template <typename Dtype>
+void Net<Dtype>::UpdateDebugInfo(const int param_id) {
+ const Blob<Dtype>& blob = *params_[param_id];
+ const int param_owner = param_owners_[param_id];
+ const string& layer_name = layer_names_[param_layer_indices_[param_id].first];
+ const string& param_display_name = param_display_names_[param_id];
+ const Dtype asum_diff = blob.asum_diff();
+ const Dtype asum_diff_mean = asum_diff / blob.count();
+ if (param_owner < 0) {
+ const Dtype asum_data = blob.asum_data();
+ const Dtype asum_data_mean = asum_data / blob.count();
+ LOG(INFO) << " [Update] Layer " << layer_name
+ << ", param " << param_display_name
+ << " data: " << asum_data << " (" << asum_data_mean << ");"
+ << " diff: " << asum_diff << " (" << asum_diff_mean << ")";
+ } else {
+ const string& owner_layer_name =
+ layer_names_[param_layer_indices_[param_owner].first];
+ LOG(INFO) << " [Update] Layer " << layer_name
+ << ", param blob " << param_display_name
+ << " (owned by layer " << owner_layer_name << ", "
+ << "param " << param_display_names_[param_owners_[param_id]] << ")"
+ << " diff: " << asum_diff << " (" << asum_diff_mean << ")";
+ }
+}
+
+template <typename Dtype>
void Net<Dtype>::ShareTrainedLayersWith(Net* other) {
int num_source_layers = other->layers().size();
for (int i = 0; i < num_source_layers; ++i) {
// diff. (Assumes that the learning rate, weight decay, etc. have already been
// accounted for in the current diff.)
for (int i = 0; i < params_.size(); ++i) {
- if (param_owners_[i] < 0) {
- continue;
- }
+ if (param_owners_[i] < 0) { continue; }
+ if (debug_info_) { UpdateDebugInfo(i); }
const int count = params_[i]->count();
const Dtype* this_diff;
Dtype* owner_diff;
owner_diff = params_[param_owners_[i]]->mutable_gpu_diff();
caffe_gpu_add(count, this_diff, owner_diff, owner_diff);
break;
+#else
+ NO_GPU;
#endif
default:
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
}
// Now, update the owned parameters.
for (int i = 0; i < params_.size(); ++i) {
- if (param_owners_[i] < 0) {
- params_[i]->Update();
- }
+ if (param_owners_[i] >= 0) { continue; }
+ if (debug_info_) { UpdateDebugInfo(i); }
+ params_[i]->Update();
}
}