1 // Fillers are random number generators that fills a blob using the specified
2 // algorithm. The expectation is that they are only going to be used during
3 // initialization time and will not involve any GPUs.
5 #ifndef CAFFE_FILLER_HPP
6 #define CAFFE_FILLER_HPP
10 #include "caffe/blob.hpp"
11 #include "caffe/common.hpp"
12 #include "caffe/proto/caffe.pb.h"
13 #include "caffe/syncedmem.hpp"
14 #include "caffe/util/math_functions.hpp"
18 /// @brief Fills a Blob with constant or randomly-generated data.
19 template <typename Dtype>
22 explicit Filler(const FillerParameter& param) : filler_param_(param) {}
24 virtual void Fill(Blob<Dtype>* blob) = 0;
26 FillerParameter filler_param_;
30 /// @brief Fills a Blob with constant values @f$ x = 0 @f$.
31 template <typename Dtype>
32 class ConstantFiller : public Filler<Dtype> {
34 explicit ConstantFiller(const FillerParameter& param)
35 : Filler<Dtype>(param) {}
36 virtual void Fill(Blob<Dtype>* blob) {
37 Dtype* data = blob->mutable_cpu_data();
38 const int count = blob->count();
39 const Dtype value = this->filler_param_.value();
41 for (int i = 0; i < count; ++i) {
44 CHECK_EQ(this->filler_param_.sparse(), -1)
45 << "Sparsity not supported by this Filler.";
49 /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
50 template <typename Dtype>
51 class UniformFiller : public Filler<Dtype> {
53 explicit UniformFiller(const FillerParameter& param)
54 : Filler<Dtype>(param) {}
55 virtual void Fill(Blob<Dtype>* blob) {
57 caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
58 Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
59 CHECK_EQ(this->filler_param_.sparse(), -1)
60 << "Sparsity not supported by this Filler.";
64 /// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
65 template <typename Dtype>
66 class GaussianFiller : public Filler<Dtype> {
68 explicit GaussianFiller(const FillerParameter& param)
69 : Filler<Dtype>(param) {}
70 virtual void Fill(Blob<Dtype>* blob) {
71 Dtype* data = blob->mutable_cpu_data();
73 caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
74 Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
75 int sparse = this->filler_param_.sparse();
78 // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
79 // These have num == channels == 1; width is number of inputs; height is
80 // number of outputs. The 'sparse' variable specifies the mean number
81 // of non-zero input weights for a given output.
82 CHECK_GE(blob->num_axes(), 1);
83 const int num_outputs = blob->shape(0);
84 Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
85 rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
86 int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
87 caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
88 for (int i = 0; i < blob->count(); ++i) {
95 shared_ptr<SyncedMemory> rand_vec_;
98 /** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
99 * such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
101 template <typename Dtype>
102 class PositiveUnitballFiller : public Filler<Dtype> {
104 explicit PositiveUnitballFiller(const FillerParameter& param)
105 : Filler<Dtype>(param) {}
106 virtual void Fill(Blob<Dtype>* blob) {
107 Dtype* data = blob->mutable_cpu_data();
108 DCHECK(blob->count());
109 caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
110 // We expect the filler to not be called very frequently, so we will
111 // just use a simple implementation
112 int dim = blob->count() / blob->num();
114 for (int i = 0; i < blob->num(); ++i) {
116 for (int j = 0; j < dim; ++j) {
117 sum += data[i * dim + j];
119 for (int j = 0; j < dim; ++j) {
120 data[i * dim + j] /= sum;
123 CHECK_EQ(this->filler_param_.sparse(), -1)
124 << "Sparsity not supported by this Filler.";
129 * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
130 * set inversely proportional to number of incoming nodes, outgoing
131 * nodes, or their average.
133 * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
134 * the difficulty of training deep feedforward neuralnetworks.
136 * It fills the incoming matrix by randomly sampling uniform data from [-scale,
137 * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
138 * average, depending on the variance_norm option. You should make sure the
139 * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
140 * = fan_out. Note that this is currently not the case for inner product layers.
142 * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
144 template <typename Dtype>
145 class XavierFiller : public Filler<Dtype> {
147 explicit XavierFiller(const FillerParameter& param)
148 : Filler<Dtype>(param) {}
149 virtual void Fill(Blob<Dtype>* blob) {
150 CHECK(blob->count());
151 int fan_in = blob->count() / blob->num();
152 int fan_out = blob->count() / blob->channels();
153 Dtype n = fan_in; // default to fan_in
154 if (this->filler_param_.variance_norm() ==
155 FillerParameter_VarianceNorm_AVERAGE) {
156 n = (fan_in + fan_out) / Dtype(2);
157 } else if (this->filler_param_.variance_norm() ==
158 FillerParameter_VarianceNorm_FAN_OUT) {
161 Dtype scale = sqrt(Dtype(3) / n);
162 caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
163 blob->mutable_cpu_data());
164 CHECK_EQ(this->filler_param_.sparse(), -1)
165 << "Sparsity not supported by this Filler.";
170 * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
171 * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
172 * nodes, outgoing nodes, or their average.
174 * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
175 * accounts for ReLU nonlinearities.
177 * Aside: for another perspective on the scaling factor, see the derivation of
178 * [Saxe, McClelland, and Ganguli 2013 (v3)].
180 * It fills the incoming matrix by randomly sampling Gaussian data with std =
181 * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
182 * the variance_norm option. You should make sure the input blob has shape (num,
183 * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
184 * is currently not the case for inner product layers.
186 template <typename Dtype>
187 class MSRAFiller : public Filler<Dtype> {
189 explicit MSRAFiller(const FillerParameter& param)
190 : Filler<Dtype>(param) {}
191 virtual void Fill(Blob<Dtype>* blob) {
192 CHECK(blob->count());
193 int fan_in = blob->count() / blob->num();
194 int fan_out = blob->count() / blob->channels();
195 Dtype n = fan_in; // default to fan_in
196 if (this->filler_param_.variance_norm() ==
197 FillerParameter_VarianceNorm_AVERAGE) {
198 n = (fan_in + fan_out) / Dtype(2);
199 } else if (this->filler_param_.variance_norm() ==
200 FillerParameter_VarianceNorm_FAN_OUT) {
203 Dtype std = sqrt(Dtype(2) / n);
204 caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
205 blob->mutable_cpu_data());
206 CHECK_EQ(this->filler_param_.sparse(), -1)
207 << "Sparsity not supported by this Filler.";
212 @brief Fills a Blob with coefficients for bilinear interpolation.
214 A common use case is with the DeconvolutionLayer acting as upsampling.
215 You can upsample a feature map with shape of (B, C, H, W) by any integer factor
216 using the following proto.
219 name: "upsample", type: "Deconvolution"
220 bottom: "{{bottom_name}}" top: "{{top_name}}"
222 kernel_size: {{2 * factor - factor % 2}} stride: {{factor}}
223 num_output: {{C}} group: {{C}}
224 pad: {{ceil((factor - 1) / 2.)}}
225 weight_filler: { type: "bilinear" } bias_term: false
227 param { lr_mult: 0 decay_mult: 0 }
230 Please use this by replacing `{{}}` with your values. By specifying
231 `num_output: {{C}} group: {{C}}`, it behaves as
232 channel-wise convolution. The filter shape of this deconvolution layer will be
233 (C, 1, K, K) where K is `kernel_size`, and this filler will set a (K, K)
234 interpolation kernel for every channel of the filter identically. The resulting
235 shape of the top feature map will be (B, C, factor * H, factor * W).
236 Note that the learning rate and the
237 weight decay are set to 0 in order to keep coefficient values of bilinear
238 interpolation unchanged during training. If you apply this to an image, this
239 operation is equivalent to the following call in Python with Scikit.Image.
241 out = skimage.transform.rescale(img, factor, mode='constant', cval=0)
244 template <typename Dtype>
245 class BilinearFiller : public Filler<Dtype> {
247 explicit BilinearFiller(const FillerParameter& param)
248 : Filler<Dtype>(param) {}
249 virtual void Fill(Blob<Dtype>* blob) {
250 CHECK_EQ(blob->num_axes(), 4) << "Blob must be 4 dim.";
251 CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
252 Dtype* data = blob->mutable_cpu_data();
253 int f = ceil(blob->width() / 2.);
254 float c = (2 * f - 1 - f % 2) / (2. * f);
255 for (int i = 0; i < blob->count(); ++i) {
256 float x = i % blob->width();
257 float y = (i / blob->width()) % blob->height();
258 data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
260 CHECK_EQ(this->filler_param_.sparse(), -1)
261 << "Sparsity not supported by this Filler.";
266 * @brief Get a specific filler from the specification given in FillerParameter.
268 * Ideally this would be replaced by a factory pattern, but we will leave it
271 template <typename Dtype>
272 Filler<Dtype>* GetFiller(const FillerParameter& param) {
273 const std::string& type = param.type();
274 if (type == "constant") {
275 return new ConstantFiller<Dtype>(param);
276 } else if (type == "gaussian") {
277 return new GaussianFiller<Dtype>(param);
278 } else if (type == "positive_unitball") {
279 return new PositiveUnitballFiller<Dtype>(param);
280 } else if (type == "uniform") {
281 return new UniformFiller<Dtype>(param);
282 } else if (type == "xavier") {
283 return new XavierFiller<Dtype>(param);
284 } else if (type == "msra") {
285 return new MSRAFiller<Dtype>(param);
286 } else if (type == "bilinear") {
287 return new BilinearFiller<Dtype>(param);
289 CHECK(false) << "Unknown filler name: " << param.type();
291 return (Filler<Dtype>*)(NULL);
296 #endif // CAFFE_FILLER_HPP_