include/caffe/layer.hpp

   1 #ifndef CAFFE_LAYER_H_
   2 #define CAFFE_LAYER_H_
   3
   4 #include <algorithm>
   5 #include <string>
   6 #include <vector>
   7
   8 #include "caffe/blob.hpp"
   9 #include "caffe/common.hpp"
  10 #include "caffe/proto/caffe.pb.h"
  11 #include "caffe/util/device_alternate.hpp"
  12
  13 namespace caffe {
  14
  15 /**
  16  * @brief An interface for the units of computation which can be composed into a
  17  *        Net.
  18  *
  19  * Layer&s must implement a Forward function, in which they take their input
  20  * (bottom) Blob&s (if any) and compute their output Blob&s (if any).
  21  * They may also implement a Backward function, in which they compute the error
  22  * gradients with respect to their input Blob&s, given the error gradients with
  23  * their output Blob&s.
  24  */
  25 template <typename Dtype>
  26 class Layer {
  27  public:
  28   /**
  29    * You should not implement your own constructor. Any set up code should go
  30    * to SetUp(), where the dimensions of the bottom blobs are provided to the
  31    * layer.
  32    */
  33   explicit Layer(const LayerParameter& param)
  34     : layer_param_(param) {
  35       // The only thing we do is to copy blobs if there are any.
  36       if (layer_param_.blobs_size() > 0) {
  37         blobs_.resize(layer_param_.blobs_size());
  38         for (int i = 0; i < layer_param_.blobs_size(); ++i) {
  39           blobs_[i].reset(new Blob<Dtype>());
  40           blobs_[i]->FromProto(layer_param_.blobs(i));
  41         }
  42       }
  43     }
  44   virtual ~Layer() {}
  45
  46   /**
  47    * @brief Implements common layer setup functionality.
  48    *
  49    * @param bottom the preshaped input blobs
  50    * @param top
  51    *     the allocated but unshaped output blobs, to be shaped by Reshape
  52    *
  53    * Checks that the number of bottom and top blobs is correct.
  54    * Calls LayerSetUp to do special layer setup for individual layer types,
  55    * followed by Reshape to set up sizes of top blobs and internal buffers.
  56    * Sets up the loss weight multiplier blobs for any non-zero loss weights.
  57    * This method may not be overridden.
  58    */
  59   void SetUp(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
  60     CheckBlobCounts(bottom, *top);
  61     LayerSetUp(bottom, top);
  62     Reshape(bottom, top);
  63     SetLossWeights(top);
  64   }
  65
  66   /**
  67    * @brief Does layer-specific setup: your layer should implement this function
  68    *        as well as Reshape.
  69    *
  70    * @param bottom
  71    *     the preshaped input blobs, whose data fields store the input data for
  72    *     this layer
  73    * @param top
  74    *     the allocated but unshaped output blobs
  75    *
  76    * This method should do one-time layer specific setup. This includes reading
  77    * and processing relevent parameters from the <code>layer_param_</code>.
  78    * Setting up the shapes of top blobs and internal buffers should be done in
  79    * <code>Reshape</code>, which will be called before the forward pass to
  80    * adjust the top blob sizes.
  81    */
  82   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
  83       vector<Blob<Dtype>*>* top) {}
  84
  85   /**
  86    * @brief Adjust the shapes of top blobs and internal buffers to accomodate
  87    *        the shapes of the bottom blobs.
  88    *
  89    * @param bottom the input blobs, with the requested input shapes
  90    * @param top the top blobs, which should be reshaped as needed
  91    *
  92    * This method should reshape top blobs as needed according to the shapes
  93    * of the bottom (input) blobs, as well as reshaping any internal buffers
  94    * and making any other necessary adjustments so that the layer can
  95    * accomodate the bottom blobs.
  96    */
  97   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
  98       vector<Blob<Dtype>*>* top) = 0;
  99
 100   /**
 101    * @brief Given the bottom blobs, compute the top blobs and the loss.
 102    *
 103    * @param bottom
 104    *     the input blobs, whose data fields store the input data for this layer
 105    * @param top
 106    *     the preshaped output blobs, whose data fields will store this layers'
 107    *     outputs
 108    * \return The total loss from the layer.
 109    *
 110    * The Forward wrapper calls the relevant device wrapper function
 111    * (Forward_cpu or Forward_gpu) to compute the top blob values given the
 112    * bottom blobs.  If the layer has any non-zero loss_weights, the wrapper
 113    * then computes and returns the loss.
 114    *
 115    * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
 116    */
 117   inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
 118       vector<Blob<Dtype>*>* top);
 119
 120   /**
 121    * @brief Given the top blob error gradients, compute the bottom blob error
 122    *        gradients.
 123    *
 124    * @param top
 125    *     the output blobs, whose diff fields store the gradient of the error
 126    *     with respect to themselves
 127    * @param propagate_down
 128    *     a vector with equal length to bottom, with each index indicating
 129    *     whether to propagate the error gradients down to the bottom blob at
 130    *     the corresponding index
 131    * @param bottom
 132    *     the input blobs, whose diff fields will store the gradient of the error
 133    *     with respect to themselves after Backward is run
 134    *
 135    * The Backward wrapper calls the relevant device wrapper function
 136    * (Backward_cpu or Backward_gpu) to compute the bottom blob diffs given the
 137    * top blob diffs.
 138    *
 139    * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
 140    */
 141   inline void Backward(const vector<Blob<Dtype>*>& top,
 142       const vector<bool>& propagate_down,
 143       vector<Blob<Dtype>*>* bottom);
 144
 145   /**
 146    * @brief Returns the vector of learnable parameter blobs.
 147    */
 148   vector<shared_ptr<Blob<Dtype> > >& blobs() {
 149     return blobs_;
 150   }
 151
 152   /**
 153    * @brief Returns the layer parameter.
 154    */
 155   const LayerParameter& layer_param() const { return layer_param_; }
 156
 157   /**
 158    * @brief Writes the layer parameter to a protocol buffer
 159    */
 160   virtual void ToProto(LayerParameter* param, bool write_diff = false);
 161
 162   /**
 163    * @brief Returns the scalar loss associated with a top blob at a given index.
 164    */
 165   inline Dtype loss(const int top_index) const {
 166     return (loss_.size() > top_index) ? loss_[top_index] : Dtype(0);
 167   }
 168
 169   /**
 170    * @brief Sets the loss associated with a top blob at a given index.
 171    */
 172   inline void set_loss(const int top_index, const Dtype value) {
 173     if (loss_.size() <= top_index) {
 174       loss_.resize(top_index + 1, Dtype(0));
 175     }
 176     loss_[top_index] = value;
 177   }
 178
 179   /**
 180    * @brief Returns the layer type as an enum value.
 181    */
 182   virtual inline LayerParameter_LayerType type() const {
 183     return LayerParameter_LayerType_NONE;
 184   }
 185
 186   /**
 187    * @brief Returns the layer type name.
 188    */
 189   virtual inline const string& type_name() const {
 190     return LayerParameter_LayerType_Name(type());
 191   }
 192
 193   /**
 194    * @brief Returns the exact number of bottom blobs required by the layer,
 195    *        or -1 if no exact number is required.
 196    *
 197    * This method should be overridden to return a non-negative value if your
 198    * layer expects some exact number of bottom blobs.
 199    */
 200   virtual inline int ExactNumBottomBlobs() const { return -1; }
 201   /**
 202    * @brief Returns the minimum number of bottom blobs required by the layer,
 203    *        or -1 if no minimum number is required.
 204    *
 205    * This method should be overridden to return a non-negative value if your
 206    * layer expects some minimum number of bottom blobs.
 207    */
 208   virtual inline int MinBottomBlobs() const { return -1; }
 209   /**
 210    * @brief Returns the maximum number of bottom blobs required by the layer,
 211    *        or -1 if no maximum number is required.
 212    *
 213    * This method should be overridden to return a non-negative value if your
 214    * layer expects some maximum number of bottom blobs.
 215    */
 216   virtual inline int MaxBottomBlobs() const { return -1; }
 217   /**
 218    * @brief Returns the exact number of top blobs required by the layer,
 219    *        or -1 if no exact number is required.
 220    *
 221    * This method should be overridden to return a non-negative value if your
 222    * layer expects some exact number of top blobs.
 223    */
 224   virtual inline int ExactNumTopBlobs() const { return -1; }
 225   /**
 226    * @brief Returns the minimum number of top blobs required by the layer,
 227    *        or -1 if no minimum number is required.
 228    *
 229    * This method should be overridden to return a non-negative value if your
 230    * layer expects some minimum number of top blobs.
 231    */
 232   virtual inline int MinTopBlobs() const { return -1; }
 233   /**
 234    * @brief Returns the maximum number of top blobs required by the layer,
 235    *        or -1 if no maximum number is required.
 236    *
 237    * This method should be overridden to return a non-negative value if your
 238    * layer expects some maximum number of top blobs.
 239    */
 240   virtual inline int MaxTopBlobs() const { return -1; }
 241   /**
 242    * @brief Returns true if the layer requires an equal number of bottom and
 243    *        top blobs.
 244    *
 245    * This method should be overridden to return true if your layer expects an
 246    * equal number of bottom and top blobs.
 247    */
 248   virtual inline bool EqualNumBottomTopBlobs() const { return false; }
 249
 250   /**
 251    * @brief Return whether "anonymous" top blobs are created automatically
 252    *        by the layer.
 253    *
 254    * If this method returns true, Net::Init will create enough "anonymous" top
 255    * blobs to fulfill the requirement specified by ExactNumTopBlobs() or
 256    * MinTopBlobs().
 257    */
 258   virtual inline bool AutoTopBlobs() const { return false; }
 259
 260   /**
 261    * @brief Return whether to allow force_backward for a given bottom blob
 262    *        index.
 263    *
 264    * If AllowForceBackward(i) == false, we will ignore the force_backward
 265    * setting and backpropagate to blob i only if it needs gradient information
 266    * (as is done when force_backward == false).
 267    */
 268   virtual inline bool AllowForceBackward(const int bottom_index) const {
 269     return true;
 270   }
 271
 272   /**
 273    * @brief Specifies whether the layer should compute gradients w.r.t. a
 274    *        parameter at a particular index given by param_id.
 275    *
 276    * You can safely ignore false values and always compute gradients
 277    * for all parameters, but possibly with wasteful computation.
 278    */
 279   inline bool param_propagate_down(const int param_id) {
 280     return (param_propagate_down_.size() > param_id) ?
 281         param_propagate_down_[param_id] : false;
 282   }
 283   /**
 284    * @brief Sets whether the layer should compute gradients w.r.t. a
 285    *        parameter at a particular index given by param_id.
 286    */
 287   inline void set_param_propagate_down(const int param_id, const bool value) {
 288     if (param_propagate_down_.size() <= param_id) {
 289       param_propagate_down_.resize(param_id + 1, true);
 290     }
 291     param_propagate_down_[param_id] = value;
 292   }
 293
 294
 295  protected:
 296   /** The protobuf that stores the layer parameters */
 297   LayerParameter layer_param_;
 298   /** The vector that stores the learnable parameters as a set of blobs. */
 299   vector<shared_ptr<Blob<Dtype> > > blobs_;
 300   /** Vector indicating whether to compute the diff of each param blob. */
 301   vector<bool> param_propagate_down_;
 302
 303   /** The vector that indicates whether each top blob has a non-zero weight in
 304    *  the objective function. */
 305   vector<Dtype> loss_;
 306
 307   /** @brief Using the CPU device, compute the layer output. */
 308   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 309       vector<Blob<Dtype>*>* top) = 0;
 310   /**
 311    * @brief Using the GPU device, compute the layer output.
 312    *        Fall back to Forward_cpu() if unavailable.
 313    */
 314   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 315       vector<Blob<Dtype>*>* top) {
 316     // LOG(WARNING) << "Using CPU code as backup.";
 317     return Forward_cpu(bottom, top);
 318   }
 319
 320   /**
 321    * @brief Using the CPU device, compute the gradients for any parameters and
 322    *        for the bottom blobs if propagate_down is true.
 323    */
 324   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
 325       const vector<bool>& propagate_down,
 326       vector<Blob<Dtype>*>* bottom) = 0;
 327   /**
 328    * @brief Using the GPU device, compute the gradients for any parameters and
 329    *        for the bottom blobs if propagate_down is true.
 330    *        Fall back to Backward_cpu() if unavailable.
 331    */
 332   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
 333       const vector<bool>& propagate_down,
 334       vector<Blob<Dtype>*>* bottom) {
 335     // LOG(WARNING) << "Using CPU code as backup.";
 336     Backward_cpu(top, propagate_down, bottom);
 337   }
 338
 339   /**
 340    * Called by the parent Layer's SetUp to check that the number of bottom
 341    * and top Blobs provided as input match the expected numbers specified by
 342    * the {ExactNum,Min,Max}{Bottom,Top}Blobs() functions.
 343    */
 344   virtual void CheckBlobCounts(const vector<Blob<Dtype>*>& bottom,
 345                                const vector<Blob<Dtype>*>& top) {
 346     if (ExactNumBottomBlobs() >= 0) {
 347       CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
 348           << type_name() << " Layer takes " << ExactNumBottomBlobs()
 349           << " bottom blob(s) as input.";
 350     }
 351     if (MinBottomBlobs() >= 0) {
 352       CHECK_LE(MinBottomBlobs(), bottom.size())
 353           << type_name() << " Layer takes at least " << MinBottomBlobs()
 354           << " bottom blob(s) as input.";
 355     }
 356     if (MaxBottomBlobs() >= 0) {
 357       CHECK_GE(MaxBottomBlobs(), bottom.size())
 358           << type_name() << " Layer takes at most " << MaxBottomBlobs()
 359           << " bottom blob(s) as input.";
 360     }
 361     if (ExactNumTopBlobs() >= 0) {
 362       CHECK_EQ(ExactNumTopBlobs(), top.size())
 363           << type_name() << " Layer produces " << ExactNumTopBlobs()
 364           << " top blob(s) as output.";
 365     }
 366     if (MinTopBlobs() >= 0) {
 367       CHECK_LE(MinTopBlobs(), top.size())
 368           << type_name() << " Layer produces at least " << MinTopBlobs()
 369           << " top blob(s) as output.";
 370     }
 371     if (MaxTopBlobs() >= 0) {
 372       CHECK_GE(MaxTopBlobs(), top.size())
 373           << type_name() << " Layer produces at most " << MaxTopBlobs()
 374           << " top blob(s) as output.";
 375     }
 376     if (EqualNumBottomTopBlobs()) {
 377       CHECK_EQ(bottom.size(), top.size())
 378           << type_name() << " Layer produces one top blob as output for each "
 379           << "bottom blob input.";
 380     }
 381   }
 382
 383   /**
 384    * Called by SetUp to initialize the weights associated with any top blobs in
 385    * the loss function. Store non-zero loss weights in the diff blob.
 386    */
 387   inline void SetLossWeights(vector<Blob<Dtype>*>* top) {
 388     const int num_loss_weights = layer_param_.loss_weight_size();
 389     if (num_loss_weights) {
 390       CHECK_EQ(top->size(), num_loss_weights) << "loss_weight must be "
 391           "unspecified or specified once per top blob.";
 392       for (int top_id = 0; top_id < top->size(); ++top_id) {
 393         const Dtype loss_weight = layer_param_.loss_weight(top_id);
 394         if (loss_weight == Dtype(0)) { continue; }
 395         this->set_loss(top_id, loss_weight);
 396         const int count = (*top)[top_id]->count();
 397         Dtype* loss_multiplier = (*top)[top_id]->mutable_cpu_diff();
 398         caffe_set(count, loss_weight, loss_multiplier);
 399       }
 400     }
 401   }
 402
 403   DISABLE_COPY_AND_ASSIGN(Layer);
 404 };  // class Layer
 405
 406 // Forward and backward wrappers. You should implement the cpu and
 407 // gpu specific implementations instead, and should not change these
 408 // functions.
 409 template <typename Dtype>
 410 inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
 411     vector<Blob<Dtype>*>* top) {
 412   Dtype loss = 0;
 413   switch (Caffe::mode()) {
 414   case Caffe::CPU:
 415     Forward_cpu(bottom, top);
 416     for (int top_id = 0; top_id < top->size(); ++top_id) {
 417       if (!this->loss(top_id)) { continue; }
 418       const int count = (*top)[top_id]->count();
 419       const Dtype* data = (*top)[top_id]->cpu_data();
 420       const Dtype* loss_weights = (*top)[top_id]->cpu_diff();
 421       loss += caffe_cpu_dot(count, data, loss_weights);
 422     }
 423     break;
 424   case Caffe::GPU:
 425     Forward_gpu(bottom, top);
 426 #ifndef CPU_ONLY
 427     for (int top_id = 0; top_id < top->size(); ++top_id) {
 428       if (!this->loss(top_id)) { continue; }
 429       const int count = (*top)[top_id]->count();
 430       const Dtype* data = (*top)[top_id]->gpu_data();
 431       const Dtype* loss_weights = (*top)[top_id]->gpu_diff();
 432       Dtype blob_loss = 0;
 433       caffe_gpu_dot(count, data, loss_weights, &blob_loss);
 434       loss += blob_loss;
 435     }
 436 #endif
 437     break;
 438   default:
 439     LOG(FATAL) << "Unknown caffe mode.";
 440   }
 441   return loss;
 442 }
 443
 444 template <typename Dtype>
 445 inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
 446     const vector<bool>& propagate_down,
 447     vector<Blob<Dtype>*>* bottom) {
 448   switch (Caffe::mode()) {
 449   case Caffe::CPU:
 450     Backward_cpu(top, propagate_down, bottom);
 451     break;
 452   case Caffe::GPU:
 453     Backward_gpu(top, propagate_down, bottom);
 454     break;
 455   default:
 456     LOG(FATAL) << "Unknown caffe mode.";
 457   }
 458 }
 459
 460 // Serialize LayerParameter to protocol buffer
 461 template <typename Dtype>
 462 void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
 463   param->Clear();
 464   param->CopyFrom(layer_param_);
 465   param->clear_blobs();
 466   for (int i = 0; i < blobs_.size(); ++i) {
 467     blobs_[i]->ToProto(param->add_blobs(), write_diff);
 468   }
 469 }
 470
 471 // The layer factory function
 472 template <typename Dtype>
 473 Layer<Dtype>* GetLayer(const LayerParameter& param);
 474
 475 }  // namespace caffe
 476
 477 #endif  // CAFFE_LAYER_H_