include/caffe/filler.hpp

   1 // Fillers are random number generators that fills a blob using the specified
   2 // algorithm. The expectation is that they are only going to be used during
   3 // initialization time and will not involve any GPUs.
   4
   5 #ifndef CAFFE_FILLER_HPP
   6 #define CAFFE_FILLER_HPP
   7
   8 #include <string>
   9
  10 #include "caffe/blob.hpp"
  11 #include "caffe/common.hpp"
  12 #include "caffe/proto/caffe.pb.h"
  13 #include "caffe/syncedmem.hpp"
  14 #include "caffe/util/math_functions.hpp"
  15
  16 namespace caffe {
  17
  18 /// @brief Fills a Blob with constant or randomly-generated data.
  19 template <typename Dtype>
  20 class Filler {
  21  public:
  22   explicit Filler(const FillerParameter& param) : filler_param_(param) {}
  23   virtual ~Filler() {}
  24   virtual void Fill(Blob<Dtype>* blob) = 0;
  25  protected:
  26   FillerParameter filler_param_;
  27 };  // class Filler
  28
  29
  30 /// @brief Fills a Blob with constant values @f$ x = 0 @f$.
  31 template <typename Dtype>
  32 class ConstantFiller : public Filler<Dtype> {
  33  public:
  34   explicit ConstantFiller(const FillerParameter& param)
  35       : Filler<Dtype>(param) {}
  36   virtual void Fill(Blob<Dtype>* blob) {
  37     Dtype* data = blob->mutable_cpu_data();
  38     const int count = blob->count();
  39     const Dtype value = this->filler_param_.value();
  40     CHECK(count);
  41     for (int i = 0; i < count; ++i) {
  42       data[i] = value;
  43     }
  44     CHECK_EQ(this->filler_param_.sparse(), -1)
  45          << "Sparsity not supported by this Filler.";
  46   }
  47 };
  48
  49 /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
  50 template <typename Dtype>
  51 class UniformFiller : public Filler<Dtype> {
  52  public:
  53   explicit UniformFiller(const FillerParameter& param)
  54       : Filler<Dtype>(param) {}
  55   virtual void Fill(Blob<Dtype>* blob) {
  56     CHECK(blob->count());
  57     caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
  58         Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
  59     CHECK_EQ(this->filler_param_.sparse(), -1)
  60          << "Sparsity not supported by this Filler.";
  61   }
  62 };
  63
  64 /// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
  65 template <typename Dtype>
  66 class GaussianFiller : public Filler<Dtype> {
  67  public:
  68   explicit GaussianFiller(const FillerParameter& param)
  69       : Filler<Dtype>(param) {}
  70   virtual void Fill(Blob<Dtype>* blob) {
  71     Dtype* data = blob->mutable_cpu_data();
  72     CHECK(blob->count());
  73     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
  74         Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
  75     int sparse = this->filler_param_.sparse();
  76     CHECK_GE(sparse, -1);
  77     if (sparse >= 0) {
  78       // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
  79       // These have num == channels == 1; width is number of inputs; height is
  80       // number of outputs.  The 'sparse' variable specifies the mean number
  81       // of non-zero input weights for a given output.
  82       CHECK_EQ(blob->num(), 1);
  83       CHECK_EQ(blob->channels(), 1);
  84       int num_outputs = blob->height();
  85       Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
  86       rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
  87       int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
  88       caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
  89       for (int i = 0; i < blob->count(); ++i) {
  90         data[i] *= mask[i];
  91       }
  92     }
  93   }
  94
  95  protected:
  96   shared_ptr<SyncedMemory> rand_vec_;
  97 };
  98
  99 /** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
 100  *         such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
 101  */
 102 template <typename Dtype>
 103 class PositiveUnitballFiller : public Filler<Dtype> {
 104  public:
 105   explicit PositiveUnitballFiller(const FillerParameter& param)
 106       : Filler<Dtype>(param) {}
 107   virtual void Fill(Blob<Dtype>* blob) {
 108     Dtype* data = blob->mutable_cpu_data();
 109     DCHECK(blob->count());
 110     caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
 111     // We expect the filler to not be called very frequently, so we will
 112     // just use a simple implementation
 113     int dim = blob->count() / blob->num();
 114     CHECK(dim);
 115     for (int i = 0; i < blob->num(); ++i) {
 116       Dtype sum = 0;
 117       for (int j = 0; j < dim; ++j) {
 118         sum += data[i * dim + j];
 119       }
 120       for (int j = 0; j < dim; ++j) {
 121         data[i * dim + j] /= sum;
 122       }
 123     }
 124     CHECK_EQ(this->filler_param_.sparse(), -1)
 125          << "Sparsity not supported by this Filler.";
 126   }
 127 };
 128
 129 /**
 130  * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
 131  *        set inversely proportional to number of incoming nodes, outgoing
 132  *        nodes, or their average.
 133  *
 134  * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
 135  * the difficulty of training deep feedforward neuralnetworks.
 136  *
 137  * It fills the incoming matrix by randomly sampling uniform data from [-scale,
 138  * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
 139  * average, depending on the variance_norm option. You should make sure the
 140  * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
 141  * = fan_out. Note that this is currently not the case for inner product layers.
 142  *
 143  * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
 144  */
 145 template <typename Dtype>
 146 class XavierFiller : public Filler<Dtype> {
 147  public:
 148   explicit XavierFiller(const FillerParameter& param)
 149       : Filler<Dtype>(param) {}
 150   virtual void Fill(Blob<Dtype>* blob) {
 151     CHECK(blob->count());
 152     int fan_in = blob->count() / blob->num();
 153     int fan_out = blob->count() / blob->channels();
 154     Dtype n = fan_in;  // default to fan_in
 155     if (this->filler_param_.variance_norm() ==
 156         FillerParameter_VarianceNorm_AVERAGE) {
 157       n = (fan_in + fan_out) / Dtype(2);
 158     } else if (this->filler_param_.variance_norm() ==
 159         FillerParameter_VarianceNorm_FAN_OUT) {
 160       n = fan_out;
 161     }
 162     Dtype scale = sqrt(Dtype(3) / n);
 163     caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
 164         blob->mutable_cpu_data());
 165     CHECK_EQ(this->filler_param_.sparse(), -1)
 166          << "Sparsity not supported by this Filler.";
 167   }
 168 };
 169
 170 /**
 171  * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
 172  *        @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
 173  *        nodes, outgoing nodes, or their average.
 174  *
 175  * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
 176  * accounts for ReLU nonlinearities.
 177  *
 178  * Aside: for another perspective on the scaling factor, see the derivation of
 179  * [Saxe, McClelland, and Ganguli 2013 (v3)].
 180  *
 181  * It fills the incoming matrix by randomly sampling Gaussian data with std =
 182  * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
 183  * the variance_norm option. You should make sure the input blob has shape (num,
 184  * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
 185  * is currently not the case for inner product layers.
 186  */
 187 template <typename Dtype>
 188 class MSRAFiller : public Filler<Dtype> {
 189  public:
 190   explicit MSRAFiller(const FillerParameter& param)
 191       : Filler<Dtype>(param) {}
 192   virtual void Fill(Blob<Dtype>* blob) {
 193     CHECK(blob->count());
 194     int fan_in = blob->count() / blob->num();
 195     int fan_out = blob->count() / blob->channels();
 196     Dtype n = fan_in;  // default to fan_in
 197     if (this->filler_param_.variance_norm() ==
 198         FillerParameter_VarianceNorm_AVERAGE) {
 199       n = (fan_in + fan_out) / Dtype(2);
 200     } else if (this->filler_param_.variance_norm() ==
 201         FillerParameter_VarianceNorm_FAN_OUT) {
 202       n = fan_out;
 203     }
 204     Dtype std = sqrt(Dtype(2) / n);
 205     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
 206         blob->mutable_cpu_data());
 207     CHECK_EQ(this->filler_param_.sparse(), -1)
 208          << "Sparsity not supported by this Filler.";
 209   }
 210 };
 211
 212 /**
 213  * @brief Get a specific filler from the specification given in FillerParameter.
 214  *
 215  * Ideally this would be replaced by a factory pattern, but we will leave it
 216  * this way for now.
 217  */
 218 template <typename Dtype>
 219 Filler<Dtype>* GetFiller(const FillerParameter& param) {
 220   const std::string& type = param.type();
 221   if (type == "constant") {
 222     return new ConstantFiller<Dtype>(param);
 223   } else if (type == "gaussian") {
 224     return new GaussianFiller<Dtype>(param);
 225   } else if (type == "positive_unitball") {
 226     return new PositiveUnitballFiller<Dtype>(param);
 227   } else if (type == "uniform") {
 228     return new UniformFiller<Dtype>(param);
 229   } else if (type == "xavier") {
 230     return new XavierFiller<Dtype>(param);
 231   } else if (type == "msra") {
 232     return new MSRAFiller<Dtype>(param);
 233   } else {
 234     CHECK(false) << "Unknown filler name: " << param.type();
 235   }
 236   return (Filler<Dtype>*)(NULL);
 237 }
 238
 239 }  // namespace caffe
 240
 241 #endif  // CAFFE_FILLER_HPP_