add SplitLayer and Net::AddSplits to transform shared bottom blobs into
authorJeff Donahue <jeff.donahue@gmail.com>
Sat, 15 Feb 2014 19:53:24 +0000 (11:53 -0800)
committerJeff Donahue <jeff.donahue@gmail.com>
Sat, 15 Feb 2014 23:04:42 +0000 (15:04 -0800)
split layers

include/caffe/net.hpp
include/caffe/vision_layers.hpp
src/caffe/layer_factory.cpp
src/caffe/layers/split_layer.cpp [new file with mode: 0644]
src/caffe/net.cpp

index 684d6c5..cee1d38 100644 (file)
@@ -28,6 +28,9 @@ class Net {
 
   // Initialize a network with the network parameter.
   void Init(const NetParameter& param);
+  // Copy NetParameters with SplitLayers added to replace any shared bottom
+  // blobs with unique bottom blobs provided by the SplitLayer.
+  void AddSplits(const NetParameter& param, NetParameter* param_split);
 
   // Run forward with the input blobs already fed separately. You can get the
   // input blobs using input_blobs().
index 82e52cd..4db2556 100644 (file)
@@ -109,6 +109,27 @@ class DropoutLayer : public NeuronLayer<Dtype> {
 
 
 template <typename Dtype>
+class SplitLayer : public Layer<Dtype> {
+ public:
+  explicit SplitLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  int count_;
+};
+
+
+template <typename Dtype>
 class FlattenLayer : public Layer<Dtype> {
  public:
   explicit FlattenLayer(const LayerParameter& param)
index b62ba38..48d6edf 100644 (file)
@@ -53,6 +53,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
     return new SoftmaxLayer<Dtype>(param);
   } else if (type == "softmax_loss") {
     return new SoftmaxWithLossLayer<Dtype>(param);
+  } else if (type == "split") {
+    return new SplitLayer<Dtype>(param);
   } else if (type == "multinomial_logistic_loss") {
     return new MultinomialLogisticLossLayer<Dtype>(param);
   } else {
diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp
new file mode 100644 (file)
index 0000000..0fbd6d9
--- /dev/null
@@ -0,0 +1,73 @@
+// Copyright 2014 Jeff Donahue
+
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void SplitLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  CHECK_EQ(bottom.size(), 1) << "Split Layer takes a single blob as input.";
+  CHECK_GE(top->size(), 1) << "Split Layer takes at least one blob as output.";
+  count_ = bottom[0]->count();
+  for (int i = 0; i < top->size(); ++i) {
+    (*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(),
+                       bottom[0]->height(), bottom[0]->width());
+    CHECK_EQ(count_, (*top)[i]->count());
+  }
+};
+
+template <typename Dtype>
+void SplitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  for (int i = 0; i < top->size(); ++i) {
+    Dtype* top_data = (*top)[i]->mutable_cpu_data();
+    caffe_copy(count_, bottom_data, top_data);
+  }
+}
+
+template <typename Dtype>
+void SplitLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  for (int i = 0; i < top->size(); ++i) {
+    Dtype* top_data = (*top)[i]->mutable_gpu_data();
+    caffe_gpu_copy(count_, bottom_data, top_data);
+  }
+}
+
+template <typename Dtype>
+Dtype SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->cpu_diff();
+  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+  caffe_copy(count_, top_diff, bottom_diff);
+  for (int i = 1; i < top.size(); ++i) {
+    top_diff = top[i]->cpu_diff();
+    caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
+  }
+  return Dtype(0.);
+}
+
+
+template <typename Dtype>
+Dtype SplitLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->gpu_diff();
+  Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+  caffe_gpu_copy(count_, top_diff, bottom_diff);
+  for (int i = 1; i < top.size(); ++i) {
+    top_diff = top[i]->gpu_diff();
+    caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff);
+  }
+  return Dtype(0.);
+}
+
+INSTANTIATE_CLASS(SplitLayer);
+
+}  // namespace caffe
index f265cd3..3c4148f 100644 (file)
@@ -1,5 +1,6 @@
 // Copyright Yangqing Jia 2013
 
+#include <fstream>
 #include <map>
 #include <set>
 #include <string>
@@ -29,7 +30,10 @@ Net<Dtype>::Net(const string& param_file) {
 }
 
 template <typename Dtype>
-void Net<Dtype>::Init(const NetParameter& param) {
+void Net<Dtype>::Init(const NetParameter& in_param) {
+  // Create a copy of in_param with splits added where necessary.
+  NetParameter param;
+  AddSplits(in_param, &param);
   // Basically, build all the layers and set up its connections.
   name_ = param.name();
   map<string, int> blob_name_to_idx;
@@ -154,6 +158,72 @@ void Net<Dtype>::Init(const NetParameter& param) {
 
 
 template <typename Dtype>
+void Net<Dtype>::AddSplits(const NetParameter& param,
+    NetParameter* param_split) {
+  // Initialize by copying from the input NetParameter.
+  param_split->CopyFrom(param);
+  param_split->clear_layers();
+  map<string, int> blob_name_to_bottom_count;
+  map<string, int> blob_name_to_bottom_split_idx;
+  // Determine for each top blob the number of times it's used as a bottom blob.
+  for (int i = 0; i < param.layers_size(); ++i) {
+    const LayerConnection& layer_connection = param.layers(i);
+    for (int j = 0; j < layer_connection.bottom_size(); ++j) {
+      const string& blob_name = layer_connection.bottom(j);
+      blob_name_to_bottom_count[blob_name]++;
+    }
+    for (int j = 0; j < layer_connection.top_size(); ++j) {
+      const string& blob_name = layer_connection.top(j);
+      blob_name_to_bottom_count[blob_name] = 0;
+      blob_name_to_bottom_split_idx[blob_name] = 0;
+    }
+  }
+  for (int i = 0; i < param.layers_size(); ++i) {
+    LayerConnection* layer_connection = param_split->add_layers();
+    layer_connection->CopyFrom(param.layers(i));
+    // Replace any shared bottom blobs with split layer outputs.
+    for (int j = 0; j < layer_connection->bottom_size(); ++j) {
+      const string& blob_name = layer_connection->bottom(j);
+      const int split_count = blob_name_to_bottom_count[blob_name];
+      if (split_count > 1) {
+        const int suffix_max_length = 16;
+        char split_suffix[suffix_max_length];
+        const int suffix_length = snprintf(split_suffix, suffix_max_length,
+            "_split_%d", blob_name_to_bottom_split_idx[blob_name]++);
+        CHECK_LT(suffix_length, suffix_max_length);
+        const string& split_blob_name = blob_name + split_suffix;
+        layer_connection->set_bottom(j, split_blob_name);
+      }
+    }
+    // Create split blob for any top blobs used by other layers as bottom
+    // blobs more than once.
+    for (int j = 0; j < layer_connection->top_size(); ++j) {
+      const string& blob_name = layer_connection->top(j);
+      const int split_count = blob_name_to_bottom_count[blob_name];
+      if (split_count > 1) {
+        LayerConnection* split_layer_connection = param_split->add_layers();
+        split_layer_connection->add_bottom(blob_name);
+        LayerParameter* split_layer_param =
+            split_layer_connection->mutable_layer();
+        split_layer_param->set_name(blob_name + "_split");
+        split_layer_param->set_type("split");
+        vector<Blob<Dtype>*> split_top_blobs(split_count);
+        for (int k = 0; k < split_count; ++k) {
+          const int suffix_max_length = 16;
+          char split_suffix[suffix_max_length];
+          const int suffix_length = snprintf(split_suffix, suffix_max_length,
+              "_split_%d", k);
+          CHECK_LT(suffix_length, suffix_max_length);
+          const string& split_blob_name = blob_name + split_suffix;
+          split_layer_connection->add_top(split_blob_name);
+        }
+      }
+    }
+  }
+}
+
+
+template <typename Dtype>
 void Net<Dtype>::GetLearningRateAndWeightDecay() {
   LOG(INFO) << "Collecting Learning Rate and Weight Decay.";
   for (int i = 0; i < layers_.size(); ++i) {