include/caffe/net.hpp

   1 #ifndef CAFFE_NET_HPP_
   2 #define CAFFE_NET_HPP_
   3
   4 #include <map>
   5 #include <set>
   6 #include <string>
   7 #include <utility>
   8 #include <vector>
   9
  10 #include "caffe/blob.hpp"
  11 #include "caffe/common.hpp"
  12 #include "caffe/layer.hpp"
  13 #include "caffe/proto/caffe.pb.h"
  14
  15 namespace caffe {
  16
  17 /**
  18  * @brief Connects Layer%s together into a directed acyclic graph (DAG)
  19  *        specified by a NetParameter.
  20  *
  21  * TODO(dox): more thorough description.
  22  */
  23 template <typename Dtype>
  24 class Net {
  25  public:
  26   explicit Net(const NetParameter& param);
  27   explicit Net(const string& param_file, Phase phase,
  28       const int level = 0, const vector<string>* stages = NULL);
  29   virtual ~Net() {}
  30
  31   /// @brief Initialize a network with a NetParameter.
  32   void Init(const NetParameter& param);
  33
  34   /**
  35    * @brief Run Forward and return the result.
  36    *
  37    */
  38   const vector<Blob<Dtype>*>& Forward(Dtype* loss = NULL);
  39   /// @brief DEPRECATED; use Forward() instead.
  40   const vector<Blob<Dtype>*>& ForwardPrefilled(Dtype* loss = NULL) {
  41     LOG_EVERY_N(WARNING, 1000) << "DEPRECATED: ForwardPrefilled() "
  42         << "will be removed in a future version. Use Forward().";
  43     return Forward(loss);
  44   }
  45
  46   /**
  47    * The From and To variants of Forward and Backward operate on the
  48    * (topological) ordering by which the net is specified. For general DAG
  49    * networks, note that (1) computing from one layer to another might entail
  50    * extra computation on unrelated branches, and (2) computation starting in
  51    * the middle may be incorrect if all of the layers of a fan-in are not
  52    * included.
  53    */
  54   Dtype ForwardFromTo(int start, int end);
  55   Dtype ForwardFrom(int start);
  56   Dtype ForwardTo(int end);
  57   /// @brief DEPRECATED; set input blobs then use Forward() instead.
  58   const vector<Blob<Dtype>*>& Forward(const vector<Blob<Dtype>* > & bottom,
  59       Dtype* loss = NULL);
  60
  61   /**
  62    * @brief Zeroes out the diffs of all net parameters.
  63    *        Should be run before Backward.
  64    */
  65   void ClearParamDiffs();
  66
  67   /**
  68    * The network backward should take no input and output, since it solely
  69    * computes the gradient w.r.t the parameters, and the data has already been
  70    * provided during the forward pass.
  71    */
  72   void Backward();
  73   void BackwardFromTo(int start, int end);
  74   void BackwardFrom(int start);
  75   void BackwardTo(int end);
  76
  77   /**
  78    * @brief Reshape all layers from bottom to top.
  79    *
  80    * This is useful to propagate changes to layer sizes without running
  81    * a forward pass, e.g. to compute output feature size.
  82    */
  83   void Reshape();
  84
  85   Dtype ForwardBackward() {
  86     Dtype loss;
  87     Forward(&loss);
  88     Backward();
  89     return loss;
  90   }
  91
  92   /// @brief Updates the network weights based on the diff values computed.
  93   void Update();
  94   /**
  95    * @brief Shares weight data of owner blobs with shared blobs.
  96    *
  97    * Note: this is called by Net::Init, and thus should normally not be
  98    * called manually.
  99    */
 100   void ShareWeights();
 101
 102   /**
 103    * @brief For an already initialized net, implicitly copies (i.e., using no
 104    *        additional memory) the pre-trained layers from another Net.
 105    */
 106   void ShareTrainedLayersWith(const Net* other);
 107   // For an already initialized net, CopyTrainedLayersFrom() copies the already
 108   // trained layers from another net parameter instance.
 109   /**
 110    * @brief For an already initialized net, copies the pre-trained layers from
 111    *        another Net.
 112    */
 113   void CopyTrainedLayersFrom(const NetParameter& param);
 114   void CopyTrainedLayersFrom(const string trained_filename);
 115   void CopyTrainedLayersFromBinaryProto(const string trained_filename);
 116   void CopyTrainedLayersFromHDF5(const string trained_filename);
 117   /// @brief Writes the net to a proto.
 118   void ToProto(NetParameter* param, bool write_diff = false) const;
 119   /// @brief Writes the net to an HDF5 file.
 120   void ToHDF5(const string& filename, bool write_diff = false) const;
 121
 122   /// @brief returns the network name.
 123   inline const string& name() const { return name_; }
 124   /// @brief returns the layer names
 125   inline const vector<string>& layer_names() const { return layer_names_; }
 126   /// @brief returns the blob names
 127   inline const vector<string>& blob_names() const { return blob_names_; }
 128   /// @brief returns the blobs
 129   inline const vector<shared_ptr<Blob<Dtype> > >& blobs() const {
 130     return blobs_;
 131   }
 132   /// @brief returns the layers
 133   inline const vector<shared_ptr<Layer<Dtype> > >& layers() const {
 134     return layers_;
 135   }
 136   /// @brief returns the phase: TRAIN or TEST
 137   inline Phase phase() const { return phase_; }
 138   /**
 139    * @brief returns the bottom vecs for each layer -- usually you won't
 140    *        need this unless you do per-layer checks such as gradients.
 141    */
 142   inline const vector<vector<Blob<Dtype>*> >& bottom_vecs() const {
 143     return bottom_vecs_;
 144   }
 145   /**
 146    * @brief returns the top vecs for each layer -- usually you won't
 147    *        need this unless you do per-layer checks such as gradients.
 148    */
 149   inline const vector<vector<Blob<Dtype>*> >& top_vecs() const {
 150     return top_vecs_;
 151   }
 152   /// @brief returns the ids of the top blobs of layer i
 153   inline const vector<int> & top_ids(int i) const {
 154     CHECK_GE(i, 0) << "Invalid layer id";
 155     CHECK_LT(i, top_id_vecs_.size()) << "Invalid layer id";
 156     return top_id_vecs_[i];
 157   }
 158   /// @brief returns the ids of the bottom blobs of layer i
 159   inline const vector<int> & bottom_ids(int i) const {
 160     CHECK_GE(i, 0) << "Invalid layer id";
 161     CHECK_LT(i, bottom_id_vecs_.size()) << "Invalid layer id";
 162     return bottom_id_vecs_[i];
 163   }
 164   inline const vector<vector<bool> >& bottom_need_backward() const {
 165     return bottom_need_backward_;
 166   }
 167   inline const vector<Dtype>& blob_loss_weights() const {
 168     return blob_loss_weights_;
 169   }
 170   inline const vector<bool>& layer_need_backward() const {
 171     return layer_need_backward_;
 172   }
 173   /// @brief returns the parameters
 174   inline const vector<shared_ptr<Blob<Dtype> > >& params() const {
 175     return params_;
 176   }
 177   inline const vector<Blob<Dtype>*>& learnable_params() const {
 178     return learnable_params_;
 179   }
 180   /// @brief returns the learnable parameter learning rate multipliers
 181   inline const vector<float>& params_lr() const { return params_lr_; }
 182   inline const vector<bool>& has_params_lr() const { return has_params_lr_; }
 183   /// @brief returns the learnable parameter decay multipliers
 184   inline const vector<float>& params_weight_decay() const {
 185     return params_weight_decay_;
 186   }
 187   inline const vector<bool>& has_params_decay() const {
 188     return has_params_decay_;
 189   }
 190   const map<string, int>& param_names_index() const {
 191     return param_names_index_;
 192   }
 193   inline const vector<int>& param_owners() const { return param_owners_; }
 194   inline const vector<string>& param_display_names() const {
 195     return param_display_names_;
 196   }
 197   /// @brief Input and output blob numbers
 198   inline int num_inputs() const { return net_input_blobs_.size(); }
 199   inline int num_outputs() const { return net_output_blobs_.size(); }
 200   inline const vector<Blob<Dtype>*>& input_blobs() const {
 201     return net_input_blobs_;
 202   }
 203   inline const vector<Blob<Dtype>*>& output_blobs() const {
 204     return net_output_blobs_;
 205   }
 206   inline const vector<int>& input_blob_indices() const {
 207     return net_input_blob_indices_;
 208   }
 209   inline const vector<int>& output_blob_indices() const {
 210     return net_output_blob_indices_;
 211   }
 212   bool has_blob(const string& blob_name) const;
 213   const shared_ptr<Blob<Dtype> > blob_by_name(const string& blob_name) const;
 214   bool has_layer(const string& layer_name) const;
 215   const shared_ptr<Layer<Dtype> > layer_by_name(const string& layer_name) const;
 216
 217   void set_debug_info(const bool value) { debug_info_ = value; }
 218
 219   // Helpers for Init.
 220   /**
 221    * @brief Remove layers that the user specified should be excluded given the current
 222    *        phase, level, and stage.
 223    */
 224   static void FilterNet(const NetParameter& param,
 225       NetParameter* param_filtered);
 226   /// @brief return whether NetState state meets NetStateRule rule
 227   static bool StateMeetsRule(const NetState& state, const NetStateRule& rule,
 228       const string& layer_name);
 229
 230   // Invoked at specific points during an iteration
 231   class Callback {
 232    protected:
 233     virtual void run(int layer) = 0;
 234
 235     template <typename T>
 236     friend class Net;
 237   };
 238   const vector<Callback*>& before_forward() const { return before_forward_; }
 239   void add_before_forward(Callback* value) {
 240     before_forward_.push_back(value);
 241   }
 242   const vector<Callback*>& after_forward() const { return after_forward_; }
 243   void add_after_forward(Callback* value) {
 244     after_forward_.push_back(value);
 245   }
 246   const vector<Callback*>& before_backward() const { return before_backward_; }
 247   void add_before_backward(Callback* value) {
 248     before_backward_.push_back(value);
 249   }
 250   const vector<Callback*>& after_backward() const { return after_backward_; }
 251   void add_after_backward(Callback* value) {
 252     after_backward_.push_back(value);
 253   }
 254
 255  protected:
 256   // Helpers for Init.
 257   /// @brief Append a new top blob to the net.
 258   void AppendTop(const NetParameter& param, const int layer_id,
 259                  const int top_id, set<string>* available_blobs,
 260                  map<string, int>* blob_name_to_idx);
 261   /// @brief Append a new bottom blob to the net.
 262   int AppendBottom(const NetParameter& param, const int layer_id,
 263                    const int bottom_id, set<string>* available_blobs,
 264                    map<string, int>* blob_name_to_idx);
 265   /// @brief Append a new parameter blob to the net.
 266   void AppendParam(const NetParameter& param, const int layer_id,
 267                    const int param_id);
 268
 269   /// @brief Helper for displaying debug info in Forward.
 270   void ForwardDebugInfo(const int layer_id);
 271   /// @brief Helper for displaying debug info in Backward.
 272   void BackwardDebugInfo(const int layer_id);
 273   /// @brief Helper for displaying debug info in Update.
 274   void UpdateDebugInfo(const int param_id);
 275
 276   /// @brief The network name
 277   string name_;
 278   /// @brief The phase: TRAIN or TEST
 279   Phase phase_;
 280   /// @brief Individual layers in the net
 281   vector<shared_ptr<Layer<Dtype> > > layers_;
 282   vector<string> layer_names_;
 283   map<string, int> layer_names_index_;
 284   vector<bool> layer_need_backward_;
 285   /// @brief the blobs storing intermediate results between the layer.
 286   vector<shared_ptr<Blob<Dtype> > > blobs_;
 287   vector<string> blob_names_;
 288   map<string, int> blob_names_index_;
 289   vector<bool> blob_need_backward_;
 290   /// bottom_vecs stores the vectors containing the input for each layer.
 291   /// They don't actually host the blobs (blobs_ does), so we simply store
 292   /// pointers.
 293   vector<vector<Blob<Dtype>*> > bottom_vecs_;
 294   vector<vector<int> > bottom_id_vecs_;
 295   vector<vector<bool> > bottom_need_backward_;
 296   /// top_vecs stores the vectors containing the output for each layer
 297   vector<vector<Blob<Dtype>*> > top_vecs_;
 298   vector<vector<int> > top_id_vecs_;
 299   /// Vector of weight in the loss (or objective) function of each net blob,
 300   /// indexed by blob_id.
 301   vector<Dtype> blob_loss_weights_;
 302   vector<vector<int> > param_id_vecs_;
 303   vector<int> param_owners_;
 304   vector<string> param_display_names_;
 305   vector<pair<int, int> > param_layer_indices_;
 306   map<string, int> param_names_index_;
 307   /// blob indices for the input and the output of the net
 308   vector<int> net_input_blob_indices_;
 309   vector<int> net_output_blob_indices_;
 310   vector<Blob<Dtype>*> net_input_blobs_;
 311   vector<Blob<Dtype>*> net_output_blobs_;
 312   /// The parameters in the network.
 313   vector<shared_ptr<Blob<Dtype> > > params_;
 314   vector<Blob<Dtype>*> learnable_params_;
 315   /**
 316    * The mapping from params_ -> learnable_params_: we have
 317    * learnable_param_ids_.size() == params_.size(),
 318    * and learnable_params_[learnable_param_ids_[i]] == params_[i].get()
 319    * if and only if params_[i] is an "owner"; otherwise, params_[i] is a sharer
 320    * and learnable_params_[learnable_param_ids_[i]] gives its owner.
 321    */
 322   vector<int> learnable_param_ids_;
 323   /// the learning rate multipliers for learnable_params_
 324   vector<float> params_lr_;
 325   vector<bool> has_params_lr_;
 326   /// the weight decay multipliers for learnable_params_
 327   vector<float> params_weight_decay_;
 328   vector<bool> has_params_decay_;
 329   /// The bytes of memory used by this net
 330   size_t memory_used_;
 331   /// Whether to compute and display debug info for the net.
 332   bool debug_info_;
 333   // Callbacks
 334   vector<Callback*> before_forward_;
 335   vector<Callback*> after_forward_;
 336   vector<Callback*> before_backward_;
 337   vector<Callback*> after_backward_;
 338
 339 DISABLE_COPY_AND_ASSIGN(Net);
 340 };
 341
 342
 343 }  // namespace caffe
 344
 345 #endif  // CAFFE_NET_HPP_