From: Jeff Donahue Date: Sat, 15 Feb 2014 19:53:24 +0000 (-0800) Subject: add SplitLayer and Net::AddSplits to transform shared bottom blobs into X-Git-Tag: submit/tizen/20180823.020014~766^2~18 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ba09df7250bc2042209358b9169e01e61c46850a;p=platform%2Fupstream%2Fcaffeonacl.git add SplitLayer and Net::AddSplits to transform shared bottom blobs into split layers --- diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 684d6c5..cee1d38 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -28,6 +28,9 @@ class Net { // Initialize a network with the network parameter. void Init(const NetParameter& param); + // Copy NetParameters with SplitLayers added to replace any shared bottom + // blobs with unique bottom blobs provided by the SplitLayer. + void AddSplits(const NetParameter& param, NetParameter* param_split); // Run forward with the input blobs already fed separately. You can get the // input blobs using input_blobs(). diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 82e52cd..4db2556 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -109,6 +109,27 @@ class DropoutLayer : public NeuronLayer { template +class SplitLayer : public Layer { + public: + explicit SplitLayer(const LayerParameter& param) + : Layer(param) {} + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + protected: + virtual void Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual void Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual Dtype Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + int count_; +}; + + +template class FlattenLayer : public Layer { public: explicit FlattenLayer(const LayerParameter& param) diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index b62ba38..48d6edf 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -53,6 +53,8 @@ Layer* GetLayer(const LayerParameter& param) { return new SoftmaxLayer(param); } else if (type == "softmax_loss") { return new SoftmaxWithLossLayer(param); + } else if (type == "split") { + return new SplitLayer(param); } else if (type == "multinomial_logistic_loss") { return new MultinomialLogisticLossLayer(param); } else { diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp new file mode 100644 index 0000000..0fbd6d9 --- /dev/null +++ b/src/caffe/layers/split_layer.cpp @@ -0,0 +1,73 @@ +// Copyright 2014 Jeff Donahue + +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void SplitLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 1) << "Split Layer takes a single blob as input."; + CHECK_GE(top->size(), 1) << "Split Layer takes at least one blob as output."; + count_ = bottom[0]->count(); + for (int i = 0; i < top->size(); ++i) { + (*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + CHECK_EQ(count_, (*top)[i]->count()); + } +}; + +template +void SplitLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + for (int i = 0; i < top->size(); ++i) { + Dtype* top_data = (*top)[i]->mutable_cpu_data(); + caffe_copy(count_, bottom_data, top_data); + } +} + +template +void SplitLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->gpu_data(); + for (int i = 0; i < top->size(); ++i) { + Dtype* top_data = (*top)[i]->mutable_gpu_data(); + caffe_gpu_copy(count_, bottom_data, top_data); + } +} + +template +Dtype SplitLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); + caffe_copy(count_, top_diff, bottom_diff); + for (int i = 1; i < top.size(); ++i) { + top_diff = top[i]->cpu_diff(); + caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); + } + return Dtype(0.); +} + + +template +Dtype SplitLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); + caffe_gpu_copy(count_, top_diff, bottom_diff); + for (int i = 1; i < top.size(); ++i) { + top_diff = top[i]->gpu_diff(); + caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff); + } + return Dtype(0.); +} + +INSTANTIATE_CLASS(SplitLayer); + +} // namespace caffe diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index f265cd3..3c4148f 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -1,5 +1,6 @@ // Copyright Yangqing Jia 2013 +#include #include #include #include @@ -29,7 +30,10 @@ Net::Net(const string& param_file) { } template -void Net::Init(const NetParameter& param) { +void Net::Init(const NetParameter& in_param) { + // Create a copy of in_param with splits added where necessary. + NetParameter param; + AddSplits(in_param, ¶m); // Basically, build all the layers and set up its connections. name_ = param.name(); map blob_name_to_idx; @@ -154,6 +158,72 @@ void Net::Init(const NetParameter& param) { template +void Net::AddSplits(const NetParameter& param, + NetParameter* param_split) { + // Initialize by copying from the input NetParameter. + param_split->CopyFrom(param); + param_split->clear_layers(); + map blob_name_to_bottom_count; + map blob_name_to_bottom_split_idx; + // Determine for each top blob the number of times it's used as a bottom blob. + for (int i = 0; i < param.layers_size(); ++i) { + const LayerConnection& layer_connection = param.layers(i); + for (int j = 0; j < layer_connection.bottom_size(); ++j) { + const string& blob_name = layer_connection.bottom(j); + blob_name_to_bottom_count[blob_name]++; + } + for (int j = 0; j < layer_connection.top_size(); ++j) { + const string& blob_name = layer_connection.top(j); + blob_name_to_bottom_count[blob_name] = 0; + blob_name_to_bottom_split_idx[blob_name] = 0; + } + } + for (int i = 0; i < param.layers_size(); ++i) { + LayerConnection* layer_connection = param_split->add_layers(); + layer_connection->CopyFrom(param.layers(i)); + // Replace any shared bottom blobs with split layer outputs. + for (int j = 0; j < layer_connection->bottom_size(); ++j) { + const string& blob_name = layer_connection->bottom(j); + const int split_count = blob_name_to_bottom_count[blob_name]; + if (split_count > 1) { + const int suffix_max_length = 16; + char split_suffix[suffix_max_length]; + const int suffix_length = snprintf(split_suffix, suffix_max_length, + "_split_%d", blob_name_to_bottom_split_idx[blob_name]++); + CHECK_LT(suffix_length, suffix_max_length); + const string& split_blob_name = blob_name + split_suffix; + layer_connection->set_bottom(j, split_blob_name); + } + } + // Create split blob for any top blobs used by other layers as bottom + // blobs more than once. + for (int j = 0; j < layer_connection->top_size(); ++j) { + const string& blob_name = layer_connection->top(j); + const int split_count = blob_name_to_bottom_count[blob_name]; + if (split_count > 1) { + LayerConnection* split_layer_connection = param_split->add_layers(); + split_layer_connection->add_bottom(blob_name); + LayerParameter* split_layer_param = + split_layer_connection->mutable_layer(); + split_layer_param->set_name(blob_name + "_split"); + split_layer_param->set_type("split"); + vector*> split_top_blobs(split_count); + for (int k = 0; k < split_count; ++k) { + const int suffix_max_length = 16; + char split_suffix[suffix_max_length]; + const int suffix_length = snprintf(split_suffix, suffix_max_length, + "_split_%d", k); + CHECK_LT(suffix_length, suffix_max_length); + const string& split_blob_name = blob_name + split_suffix; + split_layer_connection->add_top(split_blob_name); + } + } + } + } +} + + +template void Net::GetLearningRateAndWeightDecay() { LOG(INFO) << "Collecting Learning Rate and Weight Decay."; for (int i = 0; i < layers_.size(); ++i) {