#include <string>
#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/syncedmem.hpp"
#include "caffe/util/math_functions.hpp"
};
/**
- * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$
- * is set inversely proportional to the number of incoming nodes.
+ * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
+ * set inversely proportional to number of incoming nodes, outgoing
+ * nodes, or their average.
*
* A Filler based on the paper [Bengio and Glorot 2010]: Understanding
- * the difficulty of training deep feedforward neuralnetworks, but does not
- * use the fan_out value.
+ * the difficulty of training deep feedforward neuralnetworks.
*
- * It fills the incoming matrix by randomly sampling uniform data from
- * [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
- * of input nodes. You should make sure the input blob has shape (num, a, b, c)
- * where a * b * c = fan_in.
+ * It fills the incoming matrix by randomly sampling uniform data from [-scale,
+ * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
+ * average, depending on the variance_norm option. You should make sure the
+ * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
+ * = fan_out. Note that this is currently not the case for inner product layers.
*
* TODO(dox): make notation in above comment consistent with rest & use LaTeX.
*/
virtual void Fill(Blob<Dtype>* blob) {
CHECK(blob->count());
int fan_in = blob->count() / blob->num();
- Dtype scale = sqrt(Dtype(3) / fan_in);
+ int fan_out = blob->count() / blob->channels();
+ Dtype n = fan_in; // default to fan_in
+ if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_AVERAGE) {
+ n = (fan_in + fan_out) / Dtype(2);
+ } else if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_FAN_OUT) {
+ n = fan_out;
+ }
+ Dtype scale = sqrt(Dtype(3) / n);
caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
blob->mutable_cpu_data());
CHECK_EQ(this->filler_param_.sparse(), -1)
}
};
+/**
+ * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
+ * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
+ * nodes, outgoing nodes, or their average.
+ *
+ * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
+ * accounts for ReLU nonlinearities.
+ *
+ * Aside: for another perspective on the scaling factor, see the derivation of
+ * [Saxe, McClelland, and Ganguli 2013 (v3)].
+ *
+ * It fills the incoming matrix by randomly sampling Gaussian data with std =
+ * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
+ * the variance_norm option. You should make sure the input blob has shape (num,
+ * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
+ * is currently not the case for inner product layers.
+ */
+template <typename Dtype>
+class MSRAFiller : public Filler<Dtype> {
+ public:
+ explicit MSRAFiller(const FillerParameter& param)
+ : Filler<Dtype>(param) {}
+ virtual void Fill(Blob<Dtype>* blob) {
+ CHECK(blob->count());
+ int fan_in = blob->count() / blob->num();
+ int fan_out = blob->count() / blob->channels();
+ Dtype n = fan_in; // default to fan_in
+ if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_AVERAGE) {
+ n = (fan_in + fan_out) / Dtype(2);
+ } else if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_FAN_OUT) {
+ n = fan_out;
+ }
+ Dtype std = sqrt(Dtype(2) / n);
+ caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
+ blob->mutable_cpu_data());
+ CHECK_EQ(this->filler_param_.sparse(), -1)
+ << "Sparsity not supported by this Filler.";
+ }
+};
+
+/*!
+@brief Fills a Blob with coefficients for bilinear interpolation.
+
+A common use case is with the DeconvolutionLayer acting as upsampling.
+You can upsample a feature map with shape of (B, C, H, W) by any integer factor
+using the following proto.
+\code
+layer {
+ name: "upsample", type: "Deconvolution"
+ bottom: "{{bottom_name}}" top: "{{top_name}}"
+ convolution_param {
+ kernel_size: {{2 * factor - factor % 2}} stride: {{factor}}
+ num_output: {{C}} group: {{C}}
+ pad: {{ceil((factor - 1) / 2.)}}
+ weight_filler: { type: "bilinear" } bias_term: false
+ }
+ param { lr_mult: 0 decay_mult: 0 }
+}
+\endcode
+Please use this by replacing `{{}}` with your values. By specifying
+`num_output: {{C}} group: {{C}}`, it behaves as
+channel-wise convolution. The filter shape of this deconvolution layer will be
+(C, 1, K, K) where K is `kernel_size`, and this filler will set a (K, K)
+interpolation kernel for every channel of the filter identically. The resulting
+shape of the top feature map will be (B, C, factor * H, factor * W).
+Note that the learning rate and the
+weight decay are set to 0 in order to keep coefficient values of bilinear
+interpolation unchanged during training. If you apply this to an image, this
+operation is equivalent to the following call in Python with Scikit.Image.
+\code{.py}
+out = skimage.transform.rescale(img, factor, mode='constant', cval=0)
+\endcode
+ */
+template <typename Dtype>
+class BilinearFiller : public Filler<Dtype> {
+ public:
+ explicit BilinearFiller(const FillerParameter& param)
+ : Filler<Dtype>(param) {}
+ virtual void Fill(Blob<Dtype>* blob) {
+ CHECK_EQ(blob->num_axes(), 4) << "Blob must be 4 dim.";
+ CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
+ Dtype* data = blob->mutable_cpu_data();
+ int f = ceil(blob->width() / 2.);
+ float c = (2 * f - 1 - f % 2) / (2. * f);
+ for (int i = 0; i < blob->count(); ++i) {
+ float x = i % blob->width();
+ float y = (i / blob->width()) % blob->height();
+ data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
+ }
+ CHECK_EQ(this->filler_param_.sparse(), -1)
+ << "Sparsity not supported by this Filler.";
+ }
+};
/**
* @brief Get a specific filler from the specification given in FillerParameter.
return new UniformFiller<Dtype>(param);
} else if (type == "xavier") {
return new XavierFiller<Dtype>(param);
+ } else if (type == "msra") {
+ return new MSRAFiller<Dtype>(param);
+ } else if (type == "bilinear") {
+ return new BilinearFiller<Dtype>(param);
} else {
CHECK(false) << "Unknown filler name: " << param.type();
}