X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=include%2Fcaffe%2Ffiller.hpp;h=dad9ad46b3bf28de2e691d155902f8c5d066a324;hb=bc0d6809da5ad933739aedeb8899e74bfbc7fa39;hp=d0b5baa011f0edd7f83c7ed881678ed1e7b519b9;hpb=e4e93f4d12ab33f6765c82b148b64cb4a808a0ee;p=platform%2Fupstream%2Fcaffeonacl.git

diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp
index d0b5baa..dad9ad4 100644
--- a/include/caffe/filler.hpp
+++ b/include/caffe/filler.hpp
@@ -1,5 +1,3 @@
-// Copyright 2013 Yangqing Jia
-
 // Fillers are random number generators that fills a blob using the specified
 // algorithm. The expectation is that they are only going to be used during
 // initialization time and will not involve any GPUs.
@@ -7,17 +5,16 @@
 #ifndef CAFFE_FILLER_HPP
 #define CAFFE_FILLER_HPP
 
-//#include <mkl.h>
 #include <string>
 
-#include "caffe/common.hpp"
 #include "caffe/blob.hpp"
+#include "caffe/proto/caffe.pb.h"
 #include "caffe/syncedmem.hpp"
 #include "caffe/util/math_functions.hpp"
-#include "caffe/proto/caffe.pb.h"
 
 namespace caffe {
 
+/// @brief Fills a Blob with constant or randomly-generated data.
 template <typename Dtype>
 class Filler {
  public:
@@ -29,6 +26,7 @@ class Filler {
 };  // class Filler
 
 
+/// @brief Fills a Blob with constant values @f$ x = 0 @f$.
 template <typename Dtype>
 class ConstantFiller : public Filler<Dtype> {
  public:
@@ -42,9 +40,12 @@ class ConstantFiller : public Filler<Dtype> {
     for (int i = 0; i < count; ++i) {
       data[i] = value;
     }
+    CHECK_EQ(this->filler_param_.sparse(), -1)
+         << "Sparsity not supported by this Filler.";
   }
 };
 
+/// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
 template <typename Dtype>
 class UniformFiller : public Filler<Dtype> {
  public:
@@ -52,12 +53,14 @@ class UniformFiller : public Filler<Dtype> {
       : Filler<Dtype>(param) {}
   virtual void Fill(Blob<Dtype>* blob) {
     CHECK(blob->count());
-    caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(),
-        Dtype(this->filler_param_.min()),
-        Dtype(this->filler_param_.max()));
+    caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
+        Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
+    CHECK_EQ(this->filler_param_.sparse(), -1)
+         << "Sparsity not supported by this Filler.";
   }
 };
 
+/// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
 template <typename Dtype>
 class GaussianFiller : public Filler<Dtype> {
  public:
@@ -66,12 +69,34 @@ class GaussianFiller : public Filler<Dtype> {
   virtual void Fill(Blob<Dtype>* blob) {
     Dtype* data = blob->mutable_cpu_data();
     CHECK(blob->count());
-    caffe_vRngGaussian<Dtype>(blob->count(), blob->mutable_cpu_data(),
-        Dtype(this->filler_param_.mean()),
-        Dtype(this->filler_param_.std()));
+    caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
+        Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
+    int sparse = this->filler_param_.sparse();
+    CHECK_GE(sparse, -1);
+    if (sparse >= 0) {
+      // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
+      // These have num == channels == 1; width is number of inputs; height is
+      // number of outputs.  The 'sparse' variable specifies the mean number
+      // of non-zero input weights for a given output.
+      CHECK_GE(blob->num_axes(), 1);
+      const int num_outputs = blob->shape(0);
+      Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
+      rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
+      int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
+      caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
+      for (int i = 0; i < blob->count(); ++i) {
+        data[i] *= mask[i];
+      }
+    }
   }
+
+ protected:
+  shared_ptr<SyncedMemory> rand_vec_;
 };
 
+/** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
+ *         such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
+ */
 template <typename Dtype>
 class PositiveUnitballFiller : public Filler<Dtype> {
  public:
@@ -80,7 +105,7 @@ class PositiveUnitballFiller : public Filler<Dtype> {
   virtual void Fill(Blob<Dtype>* blob) {
     Dtype* data = blob->mutable_cpu_data();
     DCHECK(blob->count());
-    caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(), 0, 1);
+    caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
     // We expect the filler to not be called very frequently, so we will
     // just use a simple implementation
     int dim = blob->count() / blob->num();
@@ -94,17 +119,27 @@ class PositiveUnitballFiller : public Filler<Dtype> {
         data[i * dim + j] /= sum;
       }
     }
+    CHECK_EQ(this->filler_param_.sparse(), -1)
+         << "Sparsity not supported by this Filler.";
   }
 };
 
-// A filler based on the paper [Bengio and Glorot 2010]: Understanding
-// the difficulty of training deep feedforward neuralnetworks, but does not
-// use the fan_out value.
-//
-// It fills the incoming matrix by randomly sampling uniform data from
-// [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
-// of input nodes. You should make sure the input blob has shape (num, a, b, c)
-// where a * b * c = fan_in.
+/**
+ * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
+ *        set inversely proportional to number of incoming nodes, outgoing
+ *        nodes, or their average.
+ *
+ * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
+ * the difficulty of training deep feedforward neuralnetworks.
+ *
+ * It fills the incoming matrix by randomly sampling uniform data from [-scale,
+ * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
+ * average, depending on the variance_norm option. You should make sure the
+ * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
+ * = fan_out. Note that this is currently not the case for inner product layers.
+ *
+ * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
+ */
 template <typename Dtype>
 class XavierFiller : public Filler<Dtype> {
  public:
@@ -113,16 +148,125 @@ class XavierFiller : public Filler<Dtype> {
   virtual void Fill(Blob<Dtype>* blob) {
     CHECK(blob->count());
     int fan_in = blob->count() / blob->num();
-    Dtype scale = sqrt(Dtype(3) / fan_in);
-    caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(),
-        -scale, scale);
+    int fan_out = blob->count() / blob->channels();
+    Dtype n = fan_in;  // default to fan_in
+    if (this->filler_param_.variance_norm() ==
+        FillerParameter_VarianceNorm_AVERAGE) {
+      n = (fan_in + fan_out) / Dtype(2);
+    } else if (this->filler_param_.variance_norm() ==
+        FillerParameter_VarianceNorm_FAN_OUT) {
+      n = fan_out;
+    }
+    Dtype scale = sqrt(Dtype(3) / n);
+    caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
+        blob->mutable_cpu_data());
+    CHECK_EQ(this->filler_param_.sparse(), -1)
+         << "Sparsity not supported by this Filler.";
   }
 };
 
+/**
+ * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
+ *        @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
+ *        nodes, outgoing nodes, or their average.
+ *
+ * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
+ * accounts for ReLU nonlinearities.
+ *
+ * Aside: for another perspective on the scaling factor, see the derivation of
+ * [Saxe, McClelland, and Ganguli 2013 (v3)].
+ *
+ * It fills the incoming matrix by randomly sampling Gaussian data with std =
+ * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
+ * the variance_norm option. You should make sure the input blob has shape (num,
+ * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
+ * is currently not the case for inner product layers.
+ */
+template <typename Dtype>
+class MSRAFiller : public Filler<Dtype> {
+ public:
+  explicit MSRAFiller(const FillerParameter& param)
+      : Filler<Dtype>(param) {}
+  virtual void Fill(Blob<Dtype>* blob) {
+    CHECK(blob->count());
+    int fan_in = blob->count() / blob->num();
+    int fan_out = blob->count() / blob->channels();
+    Dtype n = fan_in;  // default to fan_in
+    if (this->filler_param_.variance_norm() ==
+        FillerParameter_VarianceNorm_AVERAGE) {
+      n = (fan_in + fan_out) / Dtype(2);
+    } else if (this->filler_param_.variance_norm() ==
+        FillerParameter_VarianceNorm_FAN_OUT) {
+      n = fan_out;
+    }
+    Dtype std = sqrt(Dtype(2) / n);
+    caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
+        blob->mutable_cpu_data());
+    CHECK_EQ(this->filler_param_.sparse(), -1)
+         << "Sparsity not supported by this Filler.";
+  }
+};
+
+/*!
+@brief Fills a Blob with coefficients for bilinear interpolation.
+
+A common use case is with the DeconvolutionLayer acting as upsampling.
+You can upsample a feature map with shape of (B, C, H, W) by any integer factor
+using the following proto.
+\code
+layer {
+  name: "upsample", type: "Deconvolution"
+  bottom: "{{bottom_name}}" top: "{{top_name}}"
+  convolution_param {
+    kernel_size: {{2 * factor - factor % 2}} stride: {{factor}}
+    num_output: {{C}} group: {{C}}
+    pad: {{ceil((factor - 1) / 2.)}}
+    weight_filler: { type: "bilinear" } bias_term: false
+  }
+  param { lr_mult: 0 decay_mult: 0 }
+}
+\endcode
+Please use this by replacing `{{}}` with your values. By specifying
+`num_output: {{C}} group: {{C}}`, it behaves as
+channel-wise convolution. The filter shape of this deconvolution layer will be
+(C, 1, K, K) where K is `kernel_size`, and this filler will set a (K, K)
+interpolation kernel for every channel of the filter identically. The resulting
+shape of the top feature map will be (B, C, factor * H, factor * W).
+Note that the learning rate and the
+weight decay are set to 0 in order to keep coefficient values of bilinear
+interpolation unchanged during training. If you apply this to an image, this
+operation is equivalent to the following call in Python with Scikit.Image.
+\code{.py}
+out = skimage.transform.rescale(img, factor, mode='constant', cval=0)
+\endcode
+ */
+template <typename Dtype>
+class BilinearFiller : public Filler<Dtype> {
+ public:
+  explicit BilinearFiller(const FillerParameter& param)
+      : Filler<Dtype>(param) {}
+  virtual void Fill(Blob<Dtype>* blob) {
+    CHECK_EQ(blob->num_axes(), 4) << "Blob must be 4 dim.";
+    CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
+    Dtype* data = blob->mutable_cpu_data();
+    int f = ceil(blob->width() / 2.);
+    float c = (2 * f - 1 - f % 2) / (2. * f);
+    for (int i = 0; i < blob->count(); ++i) {
+      float x = i % blob->width();
+      float y = (i / blob->width()) % blob->height();
+      data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
+    }
+    CHECK_EQ(this->filler_param_.sparse(), -1)
+         << "Sparsity not supported by this Filler.";
+  }
+};
 
-// A function to get a specific filler from the specification given in
-// FillerParameter. Ideally this would be replaced by a factory pattern,
-// but we will leave it this way for now.
+/**
+ * @brief Get a specific filler from the specification given in FillerParameter.
+ *
+ * Ideally this would be replaced by a factory pattern, but we will leave it
+ * this way for now.
+ */
 template <typename Dtype>
 Filler<Dtype>* GetFiller(const FillerParameter& param) {
   const std::string& type = param.type();
@@ -136,6 +280,10 @@ Filler<Dtype>* GetFiller(const FillerParameter& param) {
     return new UniformFiller<Dtype>(param);
   } else if (type == "xavier") {
     return new XavierFiller<Dtype>(param);
+  } else if (type == "msra") {
+    return new MSRAFiller<Dtype>(param);
+  } else if (type == "bilinear") {
+    return new BilinearFiller<Dtype>(param);
   } else {
     CHECK(false) << "Unknown filler name: " << param.type();
   }