include/caffe/filler.hpp

   1 // Fillers are random number generators that fills a blob using the specified
   2 // algorithm. The expectation is that they are only going to be used during
   3 // initialization time and will not involve any GPUs.
   4
   5 #ifndef CAFFE_FILLER_HPP
   6 #define CAFFE_FILLER_HPP
   7
   8 #include <string>
   9
  10 #include "caffe/blob.hpp"
  11 #include "caffe/common.hpp"
  12 #include "caffe/proto/caffe.pb.h"
  13 #include "caffe/syncedmem.hpp"
  14 #include "caffe/util/math_functions.hpp"
  15
  16 namespace caffe {
  17
  18 /// @brief Fills a Blob with constant or randomly-generated data.
  19 template <typename Dtype>
  20 class Filler {
  21  public:
  22   explicit Filler(const FillerParameter& param) : filler_param_(param) {}
  23   virtual ~Filler() {}
  24   virtual void Fill(Blob<Dtype>* blob) = 0;
  25  protected:
  26   FillerParameter filler_param_;
  27 };  // class Filler
  28
  29
  30 /// @brief Fills a Blob with constant values @f$ x = 0 @f$.
  31 template <typename Dtype>
  32 class ConstantFiller : public Filler<Dtype> {
  33  public:
  34   explicit ConstantFiller(const FillerParameter& param)
  35       : Filler<Dtype>(param) {}
  36   virtual void Fill(Blob<Dtype>* blob) {
  37     Dtype* data = blob->mutable_cpu_data();
  38     const int count = blob->count();
  39     const Dtype value = this->filler_param_.value();
  40     CHECK(count);
  41     for (int i = 0; i < count; ++i) {
  42       data[i] = value;
  43     }
  44     CHECK_EQ(this->filler_param_.sparse(), -1)
  45          << "Sparsity not supported by this Filler.";
  46   }
  47 };
  48
  49 /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
  50 template <typename Dtype>
  51 class UniformFiller : public Filler<Dtype> {
  52  public:
  53   explicit UniformFiller(const FillerParameter& param)
  54       : Filler<Dtype>(param) {}
  55   virtual void Fill(Blob<Dtype>* blob) {
  56     CHECK(blob->count());
  57     caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
  58         Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
  59     CHECK_EQ(this->filler_param_.sparse(), -1)
  60          << "Sparsity not supported by this Filler.";
  61   }
  62 };
  63
  64 /// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
  65 template <typename Dtype>
  66 class GaussianFiller : public Filler<Dtype> {
  67  public:
  68   explicit GaussianFiller(const FillerParameter& param)
  69       : Filler<Dtype>(param) {}
  70   virtual void Fill(Blob<Dtype>* blob) {
  71     Dtype* data = blob->mutable_cpu_data();
  72     CHECK(blob->count());
  73     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
  74         Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
  75     int sparse = this->filler_param_.sparse();
  76     CHECK_GE(sparse, -1);
  77     if (sparse >= 0) {
  78       // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
  79       // These have num == channels == 1; width is number of inputs; height is
  80       // number of outputs.  The 'sparse' variable specifies the mean number
  81       // of non-zero input weights for a given output.
  82       CHECK_EQ(blob->num(), 1);
  83       CHECK_EQ(blob->channels(), 1);
  84       int num_outputs = blob->height();
  85       Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
  86       rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
  87       int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
  88       caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
  89       for (int i = 0; i < blob->count(); ++i) {
  90         data[i] *= mask[i];
  91       }
  92     }
  93   }
  94
  95  protected:
  96   shared_ptr<SyncedMemory> rand_vec_;
  97 };
  98
  99 /** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
 100  *         such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
 101  */
 102 template <typename Dtype>
 103 class PositiveUnitballFiller : public Filler<Dtype> {
 104  public:
 105   explicit PositiveUnitballFiller(const FillerParameter& param)
 106       : Filler<Dtype>(param) {}
 107   virtual void Fill(Blob<Dtype>* blob) {
 108     Dtype* data = blob->mutable_cpu_data();
 109     DCHECK(blob->count());
 110     caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
 111     // We expect the filler to not be called very frequently, so we will
 112     // just use a simple implementation
 113     int dim = blob->count() / blob->num();
 114     CHECK(dim);
 115     for (int i = 0; i < blob->num(); ++i) {
 116       Dtype sum = 0;
 117       for (int j = 0; j < dim; ++j) {
 118         sum += data[i * dim + j];
 119       }
 120       for (int j = 0; j < dim; ++j) {
 121         data[i * dim + j] /= sum;
 122       }
 123     }
 124     CHECK_EQ(this->filler_param_.sparse(), -1)
 125          << "Sparsity not supported by this Filler.";
 126   }
 127 };
 128
 129 /**
 130  * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$
 131  *        is set inversely proportional to the number of incoming nodes.
 132  *
 133  * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
 134  * the difficulty of training deep feedforward neuralnetworks, but does not
 135  * use the fan_out value.
 136  *
 137  * It fills the incoming matrix by randomly sampling uniform data from
 138  * [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
 139  * of input nodes. You should make sure the input blob has shape (num, a, b, c)
 140  * where a * b * c = fan_in.
 141  *
 142  * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
 143  */
 144 template <typename Dtype>
 145 class XavierFiller : public Filler<Dtype> {
 146  public:
 147   explicit XavierFiller(const FillerParameter& param)
 148       : Filler<Dtype>(param) {}
 149   virtual void Fill(Blob<Dtype>* blob) {
 150     CHECK(blob->count());
 151     int fan_in = blob->count() / blob->num();
 152     Dtype scale = sqrt(Dtype(3) / fan_in);
 153     caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
 154         blob->mutable_cpu_data());
 155     CHECK_EQ(this->filler_param_.sparse(), -1)
 156          << "Sparsity not supported by this Filler.";
 157   }
 158 };
 159
 160
 161 /**
 162  * @brief Get a specific filler from the specification given in FillerParameter.
 163  *
 164  * Ideally this would be replaced by a factory pattern, but we will leave it
 165  * this way for now.
 166  */
 167 template <typename Dtype>
 168 Filler<Dtype>* GetFiller(const FillerParameter& param) {
 169   const std::string& type = param.type();
 170   if (type == "constant") {
 171     return new ConstantFiller<Dtype>(param);
 172   } else if (type == "gaussian") {
 173     return new GaussianFiller<Dtype>(param);
 174   } else if (type == "positive_unitball") {
 175     return new PositiveUnitballFiller<Dtype>(param);
 176   } else if (type == "uniform") {
 177     return new UniformFiller<Dtype>(param);
 178   } else if (type == "xavier") {
 179     return new XavierFiller<Dtype>(param);
 180   } else {
 181     CHECK(false) << "Unknown filler name: " << param.type();
 182   }
 183   return (Filler<Dtype>*)(NULL);
 184 }
 185
 186 }  // namespace caffe
 187
 188 #endif  // CAFFE_FILLER_HPP_