1 // Fillers are random number generators that fills a blob using the specified
2 // algorithm. The expectation is that they are only going to be used during
3 // initialization time and will not involve any GPUs.
5 #ifndef CAFFE_FILLER_HPP
6 #define CAFFE_FILLER_HPP
10 #include "caffe/blob.hpp"
11 #include "caffe/common.hpp"
12 #include "caffe/proto/caffe.pb.h"
13 #include "caffe/syncedmem.hpp"
14 #include "caffe/util/math_functions.hpp"
18 /// @brief Fills a Blob with constant or randomly-generated data.
19 template <typename Dtype>
22 explicit Filler(const FillerParameter& param) : filler_param_(param) {}
24 virtual void Fill(Blob<Dtype>* blob) = 0;
26 FillerParameter filler_param_;
30 /// @brief Fills a Blob with constant values @f$ x = 0 @f$.
31 template <typename Dtype>
32 class ConstantFiller : public Filler<Dtype> {
34 explicit ConstantFiller(const FillerParameter& param)
35 : Filler<Dtype>(param) {}
36 virtual void Fill(Blob<Dtype>* blob) {
37 Dtype* data = blob->mutable_cpu_data();
38 const int count = blob->count();
39 const Dtype value = this->filler_param_.value();
41 for (int i = 0; i < count; ++i) {
44 CHECK_EQ(this->filler_param_.sparse(), -1)
45 << "Sparsity not supported by this Filler.";
49 /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
50 template <typename Dtype>
51 class UniformFiller : public Filler<Dtype> {
53 explicit UniformFiller(const FillerParameter& param)
54 : Filler<Dtype>(param) {}
55 virtual void Fill(Blob<Dtype>* blob) {
57 caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
58 Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
59 CHECK_EQ(this->filler_param_.sparse(), -1)
60 << "Sparsity not supported by this Filler.";
64 /// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
65 template <typename Dtype>
66 class GaussianFiller : public Filler<Dtype> {
68 explicit GaussianFiller(const FillerParameter& param)
69 : Filler<Dtype>(param) {}
70 virtual void Fill(Blob<Dtype>* blob) {
71 Dtype* data = blob->mutable_cpu_data();
73 caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
74 Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
75 int sparse = this->filler_param_.sparse();
78 // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
79 // These have num == channels == 1; width is number of inputs; height is
80 // number of outputs. The 'sparse' variable specifies the mean number
81 // of non-zero input weights for a given output.
82 CHECK_EQ(blob->num(), 1);
83 CHECK_EQ(blob->channels(), 1);
84 int num_outputs = blob->height();
85 Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
86 rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
87 int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
88 caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
89 for (int i = 0; i < blob->count(); ++i) {
96 shared_ptr<SyncedMemory> rand_vec_;
99 /** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
100 * such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
102 template <typename Dtype>
103 class PositiveUnitballFiller : public Filler<Dtype> {
105 explicit PositiveUnitballFiller(const FillerParameter& param)
106 : Filler<Dtype>(param) {}
107 virtual void Fill(Blob<Dtype>* blob) {
108 Dtype* data = blob->mutable_cpu_data();
109 DCHECK(blob->count());
110 caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
111 // We expect the filler to not be called very frequently, so we will
112 // just use a simple implementation
113 int dim = blob->count() / blob->num();
115 for (int i = 0; i < blob->num(); ++i) {
117 for (int j = 0; j < dim; ++j) {
118 sum += data[i * dim + j];
120 for (int j = 0; j < dim; ++j) {
121 data[i * dim + j] /= sum;
124 CHECK_EQ(this->filler_param_.sparse(), -1)
125 << "Sparsity not supported by this Filler.";
130 * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
131 * set inversely proportional to number of incoming nodes, outgoing
132 * nodes, or their average.
134 * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
135 * the difficulty of training deep feedforward neuralnetworks.
137 * It fills the incoming matrix by randomly sampling uniform data from [-scale,
138 * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
139 * average, depending on the variance_norm option. You should make sure the
140 * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
141 * = fan_out. Note that this is currently not the case for inner product layers.
143 * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
145 template <typename Dtype>
146 class XavierFiller : public Filler<Dtype> {
148 explicit XavierFiller(const FillerParameter& param)
149 : Filler<Dtype>(param) {}
150 virtual void Fill(Blob<Dtype>* blob) {
151 CHECK(blob->count());
152 int fan_in = blob->count() / blob->num();
153 int fan_out = blob->count() / blob->channels();
154 Dtype n = fan_in; // default to fan_in
155 if (this->filler_param_.variance_norm() ==
156 FillerParameter_VarianceNorm_AVERAGE) {
157 n = (fan_in + fan_out) / Dtype(2);
158 } else if (this->filler_param_.variance_norm() ==
159 FillerParameter_VarianceNorm_FAN_OUT) {
162 Dtype scale = sqrt(Dtype(3) / n);
163 caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
164 blob->mutable_cpu_data());
165 CHECK_EQ(this->filler_param_.sparse(), -1)
166 << "Sparsity not supported by this Filler.";
171 * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
172 * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
173 * nodes, outgoing nodes, or their average.
175 * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
176 * accounts for ReLU nonlinearities.
178 * Aside: for another perspective on the scaling factor, see the derivation of
179 * [Saxe, McClelland, and Ganguli 2013 (v3)].
181 * It fills the incoming matrix by randomly sampling Gaussian data with std =
182 * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
183 * the variance_norm option. You should make sure the input blob has shape (num,
184 * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
185 * is currently not the case for inner product layers.
187 template <typename Dtype>
188 class MSRAFiller : public Filler<Dtype> {
190 explicit MSRAFiller(const FillerParameter& param)
191 : Filler<Dtype>(param) {}
192 virtual void Fill(Blob<Dtype>* blob) {
193 CHECK(blob->count());
194 int fan_in = blob->count() / blob->num();
195 int fan_out = blob->count() / blob->channels();
196 Dtype n = fan_in; // default to fan_in
197 if (this->filler_param_.variance_norm() ==
198 FillerParameter_VarianceNorm_AVERAGE) {
199 n = (fan_in + fan_out) / Dtype(2);
200 } else if (this->filler_param_.variance_norm() ==
201 FillerParameter_VarianceNorm_FAN_OUT) {
204 Dtype std = sqrt(Dtype(2) / n);
205 caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
206 blob->mutable_cpu_data());
207 CHECK_EQ(this->filler_param_.sparse(), -1)
208 << "Sparsity not supported by this Filler.";
213 * @brief Get a specific filler from the specification given in FillerParameter.
215 * Ideally this would be replaced by a factory pattern, but we will leave it
218 template <typename Dtype>
219 Filler<Dtype>* GetFiller(const FillerParameter& param) {
220 const std::string& type = param.type();
221 if (type == "constant") {
222 return new ConstantFiller<Dtype>(param);
223 } else if (type == "gaussian") {
224 return new GaussianFiller<Dtype>(param);
225 } else if (type == "positive_unitball") {
226 return new PositiveUnitballFiller<Dtype>(param);
227 } else if (type == "uniform") {
228 return new UniformFiller<Dtype>(param);
229 } else if (type == "xavier") {
230 return new XavierFiller<Dtype>(param);
231 } else if (type == "msra") {
232 return new MSRAFiller<Dtype>(param);
234 CHECK(false) << "Unknown filler name: " << param.type();
236 return (Filler<Dtype>*)(NULL);
241 #endif // CAFFE_FILLER_HPP_