include/caffe/filler.hpp

   1 // Fillers are random number generators that fills a blob using the specified
   2 // algorithm. The expectation is that they are only going to be used during
   3 // initialization time and will not involve any GPUs.
   4
   5 #ifndef CAFFE_FILLER_HPP
   6 #define CAFFE_FILLER_HPP
   7
   8 #include <string>
   9
  10 #include "caffe/blob.hpp"
  11 #include "caffe/common.hpp"
  12 #include "caffe/proto/caffe.pb.h"
  13 #include "caffe/syncedmem.hpp"
  14 #include "caffe/util/math_functions.hpp"
  15
  16 namespace caffe {
  17
  18 /// @brief Fills a Blob with constant or randomly-generated data.
  19 template <typename Dtype>
  20 class Filler {
  21  public:
  22   explicit Filler(const FillerParameter& param) : filler_param_(param) {}
  23   virtual ~Filler() {}
  24   virtual void Fill(Blob<Dtype>* blob) = 0;
  25  protected:
  26   FillerParameter filler_param_;
  27 };  // class Filler
  28
  29
  30 /// @brief Fills a Blob with constant values @f$ x = 0 @f$.
  31 template <typename Dtype>
  32 class ConstantFiller : public Filler<Dtype> {
  33  public:
  34   explicit ConstantFiller(const FillerParameter& param)
  35       : Filler<Dtype>(param) {}
  36   virtual void Fill(Blob<Dtype>* blob) {
  37     Dtype* data = blob->mutable_cpu_data();
  38     const int count = blob->count();
  39     const Dtype value = this->filler_param_.value();
  40     CHECK(count);
  41     for (int i = 0; i < count; ++i) {
  42       data[i] = value;
  43     }
  44     CHECK_EQ(this->filler_param_.sparse(), -1)
  45          << "Sparsity not supported by this Filler.";
  46   }
  47 };
  48
  49 /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
  50 template <typename Dtype>
  51 class UniformFiller : public Filler<Dtype> {
  52  public:
  53   explicit UniformFiller(const FillerParameter& param)
  54       : Filler<Dtype>(param) {}
  55   virtual void Fill(Blob<Dtype>* blob) {
  56     CHECK(blob->count());
  57     caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
  58         Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
  59     CHECK_EQ(this->filler_param_.sparse(), -1)
  60          << "Sparsity not supported by this Filler.";
  61   }
  62 };
  63
  64 /// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
  65 template <typename Dtype>
  66 class GaussianFiller : public Filler<Dtype> {
  67  public:
  68   explicit GaussianFiller(const FillerParameter& param)
  69       : Filler<Dtype>(param) {}
  70   virtual void Fill(Blob<Dtype>* blob) {
  71     Dtype* data = blob->mutable_cpu_data();
  72     CHECK(blob->count());
  73     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
  74         Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
  75     int sparse = this->filler_param_.sparse();
  76     CHECK_GE(sparse, -1);
  77     if (sparse >= 0) {
  78       // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
  79       // These have num == channels == 1; width is number of inputs; height is
  80       // number of outputs.  The 'sparse' variable specifies the mean number
  81       // of non-zero input weights for a given output.
  82       CHECK_GE(blob->num_axes(), 1);
  83       const int num_outputs = blob->shape(0);
  84       Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
  85       rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
  86       int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
  87       caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
  88       for (int i = 0; i < blob->count(); ++i) {
  89         data[i] *= mask[i];
  90       }
  91     }
  92   }
  93
  94  protected:
  95   shared_ptr<SyncedMemory> rand_vec_;
  96 };
  97
  98 /** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
  99  *         such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
 100  */
 101 template <typename Dtype>
 102 class PositiveUnitballFiller : public Filler<Dtype> {
 103  public:
 104   explicit PositiveUnitballFiller(const FillerParameter& param)
 105       : Filler<Dtype>(param) {}
 106   virtual void Fill(Blob<Dtype>* blob) {
 107     Dtype* data = blob->mutable_cpu_data();
 108     DCHECK(blob->count());
 109     caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
 110     // We expect the filler to not be called very frequently, so we will
 111     // just use a simple implementation
 112     int dim = blob->count() / blob->num();
 113     CHECK(dim);
 114     for (int i = 0; i < blob->num(); ++i) {
 115       Dtype sum = 0;
 116       for (int j = 0; j < dim; ++j) {
 117         sum += data[i * dim + j];
 118       }
 119       for (int j = 0; j < dim; ++j) {
 120         data[i * dim + j] /= sum;
 121       }
 122     }
 123     CHECK_EQ(this->filler_param_.sparse(), -1)
 124          << "Sparsity not supported by this Filler.";
 125   }
 126 };
 127
 128 /**
 129  * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
 130  *        set inversely proportional to number of incoming nodes, outgoing
 131  *        nodes, or their average.
 132  *
 133  * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
 134  * the difficulty of training deep feedforward neuralnetworks.
 135  *
 136  * It fills the incoming matrix by randomly sampling uniform data from [-scale,
 137  * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
 138  * average, depending on the variance_norm option. You should make sure the
 139  * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
 140  * = fan_out. Note that this is currently not the case for inner product layers.
 141  *
 142  * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
 143  */
 144 template <typename Dtype>
 145 class XavierFiller : public Filler<Dtype> {
 146  public:
 147   explicit XavierFiller(const FillerParameter& param)
 148       : Filler<Dtype>(param) {}
 149   virtual void Fill(Blob<Dtype>* blob) {
 150     CHECK(blob->count());
 151     int fan_in = blob->count() / blob->num();
 152     int fan_out = blob->count() / blob->channels();
 153     Dtype n = fan_in;  // default to fan_in
 154     if (this->filler_param_.variance_norm() ==
 155         FillerParameter_VarianceNorm_AVERAGE) {
 156       n = (fan_in + fan_out) / Dtype(2);
 157     } else if (this->filler_param_.variance_norm() ==
 158         FillerParameter_VarianceNorm_FAN_OUT) {
 159       n = fan_out;
 160     }
 161     Dtype scale = sqrt(Dtype(3) / n);
 162     caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
 163         blob->mutable_cpu_data());
 164     CHECK_EQ(this->filler_param_.sparse(), -1)
 165          << "Sparsity not supported by this Filler.";
 166   }
 167 };
 168
 169 /**
 170  * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
 171  *        @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
 172  *        nodes, outgoing nodes, or their average.
 173  *
 174  * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
 175  * accounts for ReLU nonlinearities.
 176  *
 177  * Aside: for another perspective on the scaling factor, see the derivation of
 178  * [Saxe, McClelland, and Ganguli 2013 (v3)].
 179  *
 180  * It fills the incoming matrix by randomly sampling Gaussian data with std =
 181  * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
 182  * the variance_norm option. You should make sure the input blob has shape (num,
 183  * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
 184  * is currently not the case for inner product layers.
 185  */
 186 template <typename Dtype>
 187 class MSRAFiller : public Filler<Dtype> {
 188  public:
 189   explicit MSRAFiller(const FillerParameter& param)
 190       : Filler<Dtype>(param) {}
 191   virtual void Fill(Blob<Dtype>* blob) {
 192     CHECK(blob->count());
 193     int fan_in = blob->count() / blob->num();
 194     int fan_out = blob->count() / blob->channels();
 195     Dtype n = fan_in;  // default to fan_in
 196     if (this->filler_param_.variance_norm() ==
 197         FillerParameter_VarianceNorm_AVERAGE) {
 198       n = (fan_in + fan_out) / Dtype(2);
 199     } else if (this->filler_param_.variance_norm() ==
 200         FillerParameter_VarianceNorm_FAN_OUT) {
 201       n = fan_out;
 202     }
 203     Dtype std = sqrt(Dtype(2) / n);
 204     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
 205         blob->mutable_cpu_data());
 206     CHECK_EQ(this->filler_param_.sparse(), -1)
 207          << "Sparsity not supported by this Filler.";
 208   }
 209 };
 210
 211 /**
 212  * @brief Get a specific filler from the specification given in FillerParameter.
 213  *
 214  * Ideally this would be replaced by a factory pattern, but we will leave it
 215  * this way for now.
 216  */
 217 template <typename Dtype>
 218 Filler<Dtype>* GetFiller(const FillerParameter& param) {
 219   const std::string& type = param.type();
 220   if (type == "constant") {
 221     return new ConstantFiller<Dtype>(param);
 222   } else if (type == "gaussian") {
 223     return new GaussianFiller<Dtype>(param);
 224   } else if (type == "positive_unitball") {
 225     return new PositiveUnitballFiller<Dtype>(param);
 226   } else if (type == "uniform") {
 227     return new UniformFiller<Dtype>(param);
 228   } else if (type == "xavier") {
 229     return new XavierFiller<Dtype>(param);
 230   } else if (type == "msra") {
 231     return new MSRAFiller<Dtype>(param);
 232   } else {
 233     CHECK(false) << "Unknown filler name: " << param.type();
 234   }
 235   return (Filler<Dtype>*)(NULL);
 236 }
 237
 238 }  // namespace caffe
 239
 240 #endif  // CAFFE_FILLER_HPP_