1 // Fillers are random number generators that fills a blob using the specified
2 // algorithm. The expectation is that they are only going to be used during
3 // initialization time and will not involve any GPUs.
5 #ifndef CAFFE_FILLER_HPP
6 #define CAFFE_FILLER_HPP
10 #include "caffe/blob.hpp"
11 #include "caffe/proto/caffe.pb.h"
12 #include "caffe/syncedmem.hpp"
13 #include "caffe/util/math_functions.hpp"
17 /// @brief Fills a Blob with constant or randomly-generated data.
18 template <typename Dtype>
21 explicit Filler(const FillerParameter& param) : filler_param_(param) {}
23 virtual void Fill(Blob<Dtype>* blob) = 0;
25 FillerParameter filler_param_;
29 /// @brief Fills a Blob with constant values @f$ x = 0 @f$.
30 template <typename Dtype>
31 class ConstantFiller : public Filler<Dtype> {
33 explicit ConstantFiller(const FillerParameter& param)
34 : Filler<Dtype>(param) {}
35 virtual void Fill(Blob<Dtype>* blob) {
36 Dtype* data = blob->mutable_cpu_data();
37 const int count = blob->count();
38 const Dtype value = this->filler_param_.value();
40 for (int i = 0; i < count; ++i) {
43 CHECK_EQ(this->filler_param_.sparse(), -1)
44 << "Sparsity not supported by this Filler.";
48 /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
49 template <typename Dtype>
50 class UniformFiller : public Filler<Dtype> {
52 explicit UniformFiller(const FillerParameter& param)
53 : Filler<Dtype>(param) {}
54 virtual void Fill(Blob<Dtype>* blob) {
56 caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
57 Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
58 CHECK_EQ(this->filler_param_.sparse(), -1)
59 << "Sparsity not supported by this Filler.";
63 /// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
64 template <typename Dtype>
65 class GaussianFiller : public Filler<Dtype> {
67 explicit GaussianFiller(const FillerParameter& param)
68 : Filler<Dtype>(param) {}
69 virtual void Fill(Blob<Dtype>* blob) {
70 Dtype* data = blob->mutable_cpu_data();
72 caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
73 Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
74 int sparse = this->filler_param_.sparse();
77 // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
78 // These have num == channels == 1; width is number of inputs; height is
79 // number of outputs. The 'sparse' variable specifies the mean number
80 // of non-zero input weights for a given output.
81 CHECK_GE(blob->num_axes(), 1);
82 const int num_outputs = blob->shape(0);
83 Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
84 rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
85 int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
86 caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
87 for (int i = 0; i < blob->count(); ++i) {
94 shared_ptr<SyncedMemory> rand_vec_;
97 /** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
98 * such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
100 template <typename Dtype>
101 class PositiveUnitballFiller : public Filler<Dtype> {
103 explicit PositiveUnitballFiller(const FillerParameter& param)
104 : Filler<Dtype>(param) {}
105 virtual void Fill(Blob<Dtype>* blob) {
106 Dtype* data = blob->mutable_cpu_data();
107 DCHECK(blob->count());
108 caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
109 // We expect the filler to not be called very frequently, so we will
110 // just use a simple implementation
111 int dim = blob->count() / blob->num();
113 for (int i = 0; i < blob->num(); ++i) {
115 for (int j = 0; j < dim; ++j) {
116 sum += data[i * dim + j];
118 for (int j = 0; j < dim; ++j) {
119 data[i * dim + j] /= sum;
122 CHECK_EQ(this->filler_param_.sparse(), -1)
123 << "Sparsity not supported by this Filler.";
128 * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
129 * set inversely proportional to number of incoming nodes, outgoing
130 * nodes, or their average.
132 * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
133 * the difficulty of training deep feedforward neuralnetworks.
135 * It fills the incoming matrix by randomly sampling uniform data from [-scale,
136 * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
137 * average, depending on the variance_norm option. You should make sure the
138 * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
139 * = fan_out. Note that this is currently not the case for inner product layers.
141 * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
143 template <typename Dtype>
144 class XavierFiller : public Filler<Dtype> {
146 explicit XavierFiller(const FillerParameter& param)
147 : Filler<Dtype>(param) {}
148 virtual void Fill(Blob<Dtype>* blob) {
149 CHECK(blob->count());
150 int fan_in = blob->count() / blob->num();
151 int fan_out = blob->count() / blob->channels();
152 Dtype n = fan_in; // default to fan_in
153 if (this->filler_param_.variance_norm() ==
154 FillerParameter_VarianceNorm_AVERAGE) {
155 n = (fan_in + fan_out) / Dtype(2);
156 } else if (this->filler_param_.variance_norm() ==
157 FillerParameter_VarianceNorm_FAN_OUT) {
160 Dtype scale = sqrt(Dtype(3) / n);
161 caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
162 blob->mutable_cpu_data());
163 CHECK_EQ(this->filler_param_.sparse(), -1)
164 << "Sparsity not supported by this Filler.";
169 * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
170 * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
171 * nodes, outgoing nodes, or their average.
173 * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
174 * accounts for ReLU nonlinearities.
176 * Aside: for another perspective on the scaling factor, see the derivation of
177 * [Saxe, McClelland, and Ganguli 2013 (v3)].
179 * It fills the incoming matrix by randomly sampling Gaussian data with std =
180 * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
181 * the variance_norm option. You should make sure the input blob has shape (num,
182 * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
183 * is currently not the case for inner product layers.
185 template <typename Dtype>
186 class MSRAFiller : public Filler<Dtype> {
188 explicit MSRAFiller(const FillerParameter& param)
189 : Filler<Dtype>(param) {}
190 virtual void Fill(Blob<Dtype>* blob) {
191 CHECK(blob->count());
192 int fan_in = blob->count() / blob->num();
193 int fan_out = blob->count() / blob->channels();
194 Dtype n = fan_in; // default to fan_in
195 if (this->filler_param_.variance_norm() ==
196 FillerParameter_VarianceNorm_AVERAGE) {
197 n = (fan_in + fan_out) / Dtype(2);
198 } else if (this->filler_param_.variance_norm() ==
199 FillerParameter_VarianceNorm_FAN_OUT) {
202 Dtype std = sqrt(Dtype(2) / n);
203 caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
204 blob->mutable_cpu_data());
205 CHECK_EQ(this->filler_param_.sparse(), -1)
206 << "Sparsity not supported by this Filler.";
211 @brief Fills a Blob with coefficients for bilinear interpolation.
213 A common use case is with the DeconvolutionLayer acting as upsampling.
214 You can upsample a feature map with shape of (B, C, H, W) by any integer factor
215 using the following proto.
218 name: "upsample", type: "Deconvolution"
219 bottom: "{{bottom_name}}" top: "{{top_name}}"
221 kernel_size: {{2 * factor - factor % 2}} stride: {{factor}}
222 num_output: {{C}} group: {{C}}
223 pad: {{ceil((factor - 1) / 2.)}}
224 weight_filler: { type: "bilinear" } bias_term: false
226 param { lr_mult: 0 decay_mult: 0 }
229 Please use this by replacing `{{}}` with your values. By specifying
230 `num_output: {{C}} group: {{C}}`, it behaves as
231 channel-wise convolution. The filter shape of this deconvolution layer will be
232 (C, 1, K, K) where K is `kernel_size`, and this filler will set a (K, K)
233 interpolation kernel for every channel of the filter identically. The resulting
234 shape of the top feature map will be (B, C, factor * H, factor * W).
235 Note that the learning rate and the
236 weight decay are set to 0 in order to keep coefficient values of bilinear
237 interpolation unchanged during training. If you apply this to an image, this
238 operation is equivalent to the following call in Python with Scikit.Image.
240 out = skimage.transform.rescale(img, factor, mode='constant', cval=0)
243 template <typename Dtype>
244 class BilinearFiller : public Filler<Dtype> {
246 explicit BilinearFiller(const FillerParameter& param)
247 : Filler<Dtype>(param) {}
248 virtual void Fill(Blob<Dtype>* blob) {
249 CHECK_EQ(blob->num_axes(), 4) << "Blob must be 4 dim.";
250 CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
251 Dtype* data = blob->mutable_cpu_data();
252 int f = ceil(blob->width() / 2.);
253 float c = (2 * f - 1 - f % 2) / (2. * f);
254 for (int i = 0; i < blob->count(); ++i) {
255 float x = i % blob->width();
256 float y = (i / blob->width()) % blob->height();
257 data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
259 CHECK_EQ(this->filler_param_.sparse(), -1)
260 << "Sparsity not supported by this Filler.";
265 * @brief Get a specific filler from the specification given in FillerParameter.
267 * Ideally this would be replaced by a factory pattern, but we will leave it
270 template <typename Dtype>
271 Filler<Dtype>* GetFiller(const FillerParameter& param) {
272 const std::string& type = param.type();
273 if (type == "constant") {
274 return new ConstantFiller<Dtype>(param);
275 } else if (type == "gaussian") {
276 return new GaussianFiller<Dtype>(param);
277 } else if (type == "positive_unitball") {
278 return new PositiveUnitballFiller<Dtype>(param);
279 } else if (type == "uniform") {
280 return new UniformFiller<Dtype>(param);
281 } else if (type == "xavier") {
282 return new XavierFiller<Dtype>(param);
283 } else if (type == "msra") {
284 return new MSRAFiller<Dtype>(param);
285 } else if (type == "bilinear") {
286 return new BilinearFiller<Dtype>(param);
288 CHECK(false) << "Unknown filler name: " << param.type();
290 return (Filler<Dtype>*)(NULL);
295 #endif // CAFFE_FILLER_HPP_