X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=include%2Fcaffe%2Ffiller.hpp;h=dad9ad46b3bf28de2e691d155902f8c5d066a324;hb=bc0d6809da5ad933739aedeb8899e74bfbc7fa39;hp=d0b5baa011f0edd7f83c7ed881678ed1e7b519b9;hpb=e4e93f4d12ab33f6765c82b148b64cb4a808a0ee;p=platform%2Fupstream%2Fcaffeonacl.git diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index d0b5baa..dad9ad4 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -1,5 +1,3 @@ -// Copyright 2013 Yangqing Jia - // Fillers are random number generators that fills a blob using the specified // algorithm. The expectation is that they are only going to be used during // initialization time and will not involve any GPUs. @@ -7,17 +5,16 @@ #ifndef CAFFE_FILLER_HPP #define CAFFE_FILLER_HPP -//#include #include -#include "caffe/common.hpp" #include "caffe/blob.hpp" +#include "caffe/proto/caffe.pb.h" #include "caffe/syncedmem.hpp" #include "caffe/util/math_functions.hpp" -#include "caffe/proto/caffe.pb.h" namespace caffe { +/// @brief Fills a Blob with constant or randomly-generated data. template class Filler { public: @@ -29,6 +26,7 @@ class Filler { }; // class Filler +/// @brief Fills a Blob with constant values @f$ x = 0 @f$. template class ConstantFiller : public Filler { public: @@ -42,9 +40,12 @@ class ConstantFiller : public Filler { for (int i = 0; i < count; ++i) { data[i] = value; } + CHECK_EQ(this->filler_param_.sparse(), -1) + << "Sparsity not supported by this Filler."; } }; +/// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$. template class UniformFiller : public Filler { public: @@ -52,12 +53,14 @@ class UniformFiller : public Filler { : Filler(param) {} virtual void Fill(Blob* blob) { CHECK(blob->count()); - caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), - Dtype(this->filler_param_.min()), - Dtype(this->filler_param_.max())); + caffe_rng_uniform(blob->count(), Dtype(this->filler_param_.min()), + Dtype(this->filler_param_.max()), blob->mutable_cpu_data()); + CHECK_EQ(this->filler_param_.sparse(), -1) + << "Sparsity not supported by this Filler."; } }; +/// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$. template class GaussianFiller : public Filler { public: @@ -66,12 +69,34 @@ class GaussianFiller : public Filler { virtual void Fill(Blob* blob) { Dtype* data = blob->mutable_cpu_data(); CHECK(blob->count()); - caffe_vRngGaussian(blob->count(), blob->mutable_cpu_data(), - Dtype(this->filler_param_.mean()), - Dtype(this->filler_param_.std())); + caffe_rng_gaussian(blob->count(), Dtype(this->filler_param_.mean()), + Dtype(this->filler_param_.std()), blob->mutable_cpu_data()); + int sparse = this->filler_param_.sparse(); + CHECK_GE(sparse, -1); + if (sparse >= 0) { + // Sparse initialization is implemented for "weight" blobs; i.e. matrices. + // These have num == channels == 1; width is number of inputs; height is + // number of outputs. The 'sparse' variable specifies the mean number + // of non-zero input weights for a given output. + CHECK_GE(blob->num_axes(), 1); + const int num_outputs = blob->shape(0); + Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs); + rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int))); + int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); + caffe_rng_bernoulli(blob->count(), non_zero_probability, mask); + for (int i = 0; i < blob->count(); ++i) { + data[i] *= mask[i]; + } + } } + + protected: + shared_ptr rand_vec_; }; +/** @brief Fills a Blob with values @f$ x \in [0, 1] @f$ + * such that @f$ \forall i \sum_j x_{ij} = 1 @f$. + */ template class PositiveUnitballFiller : public Filler { public: @@ -80,7 +105,7 @@ class PositiveUnitballFiller : public Filler { virtual void Fill(Blob* blob) { Dtype* data = blob->mutable_cpu_data(); DCHECK(blob->count()); - caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), 0, 1); + caffe_rng_uniform(blob->count(), 0, 1, blob->mutable_cpu_data()); // We expect the filler to not be called very frequently, so we will // just use a simple implementation int dim = blob->count() / blob->num(); @@ -94,17 +119,27 @@ class PositiveUnitballFiller : public Filler { data[i * dim + j] /= sum; } } + CHECK_EQ(this->filler_param_.sparse(), -1) + << "Sparsity not supported by this Filler."; } }; -// A filler based on the paper [Bengio and Glorot 2010]: Understanding -// the difficulty of training deep feedforward neuralnetworks, but does not -// use the fan_out value. -// -// It fills the incoming matrix by randomly sampling uniform data from -// [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number -// of input nodes. You should make sure the input blob has shape (num, a, b, c) -// where a * b * c = fan_in. +/** + * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is + * set inversely proportional to number of incoming nodes, outgoing + * nodes, or their average. + * + * A Filler based on the paper [Bengio and Glorot 2010]: Understanding + * the difficulty of training deep feedforward neuralnetworks. + * + * It fills the incoming matrix by randomly sampling uniform data from [-scale, + * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their + * average, depending on the variance_norm option. You should make sure the + * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c + * = fan_out. Note that this is currently not the case for inner product layers. + * + * TODO(dox): make notation in above comment consistent with rest & use LaTeX. + */ template class XavierFiller : public Filler { public: @@ -113,16 +148,125 @@ class XavierFiller : public Filler { virtual void Fill(Blob* blob) { CHECK(blob->count()); int fan_in = blob->count() / blob->num(); - Dtype scale = sqrt(Dtype(3) / fan_in); - caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), - -scale, scale); + int fan_out = blob->count() / blob->channels(); + Dtype n = fan_in; // default to fan_in + if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_AVERAGE) { + n = (fan_in + fan_out) / Dtype(2); + } else if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_FAN_OUT) { + n = fan_out; + } + Dtype scale = sqrt(Dtype(3) / n); + caffe_rng_uniform(blob->count(), -scale, scale, + blob->mutable_cpu_data()); + CHECK_EQ(this->filler_param_.sparse(), -1) + << "Sparsity not supported by this Filler."; } }; +/** + * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where + * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming + * nodes, outgoing nodes, or their average. + * + * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically + * accounts for ReLU nonlinearities. + * + * Aside: for another perspective on the scaling factor, see the derivation of + * [Saxe, McClelland, and Ganguli 2013 (v3)]. + * + * It fills the incoming matrix by randomly sampling Gaussian data with std = + * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on + * the variance_norm option. You should make sure the input blob has shape (num, + * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this + * is currently not the case for inner product layers. + */ +template +class MSRAFiller : public Filler { + public: + explicit MSRAFiller(const FillerParameter& param) + : Filler(param) {} + virtual void Fill(Blob* blob) { + CHECK(blob->count()); + int fan_in = blob->count() / blob->num(); + int fan_out = blob->count() / blob->channels(); + Dtype n = fan_in; // default to fan_in + if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_AVERAGE) { + n = (fan_in + fan_out) / Dtype(2); + } else if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_FAN_OUT) { + n = fan_out; + } + Dtype std = sqrt(Dtype(2) / n); + caffe_rng_gaussian(blob->count(), Dtype(0), std, + blob->mutable_cpu_data()); + CHECK_EQ(this->filler_param_.sparse(), -1) + << "Sparsity not supported by this Filler."; + } +}; + +/*! +@brief Fills a Blob with coefficients for bilinear interpolation. + +A common use case is with the DeconvolutionLayer acting as upsampling. +You can upsample a feature map with shape of (B, C, H, W) by any integer factor +using the following proto. +\code +layer { + name: "upsample", type: "Deconvolution" + bottom: "{{bottom_name}}" top: "{{top_name}}" + convolution_param { + kernel_size: {{2 * factor - factor % 2}} stride: {{factor}} + num_output: {{C}} group: {{C}} + pad: {{ceil((factor - 1) / 2.)}} + weight_filler: { type: "bilinear" } bias_term: false + } + param { lr_mult: 0 decay_mult: 0 } +} +\endcode +Please use this by replacing `{{}}` with your values. By specifying +`num_output: {{C}} group: {{C}}`, it behaves as +channel-wise convolution. The filter shape of this deconvolution layer will be +(C, 1, K, K) where K is `kernel_size`, and this filler will set a (K, K) +interpolation kernel for every channel of the filter identically. The resulting +shape of the top feature map will be (B, C, factor * H, factor * W). +Note that the learning rate and the +weight decay are set to 0 in order to keep coefficient values of bilinear +interpolation unchanged during training. If you apply this to an image, this +operation is equivalent to the following call in Python with Scikit.Image. +\code{.py} +out = skimage.transform.rescale(img, factor, mode='constant', cval=0) +\endcode + */ +template +class BilinearFiller : public Filler { + public: + explicit BilinearFiller(const FillerParameter& param) + : Filler(param) {} + virtual void Fill(Blob* blob) { + CHECK_EQ(blob->num_axes(), 4) << "Blob must be 4 dim."; + CHECK_EQ(blob->width(), blob->height()) << "Filter must be square"; + Dtype* data = blob->mutable_cpu_data(); + int f = ceil(blob->width() / 2.); + float c = (2 * f - 1 - f % 2) / (2. * f); + for (int i = 0; i < blob->count(); ++i) { + float x = i % blob->width(); + float y = (i / blob->width()) % blob->height(); + data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c)); + } + CHECK_EQ(this->filler_param_.sparse(), -1) + << "Sparsity not supported by this Filler."; + } +}; -// A function to get a specific filler from the specification given in -// FillerParameter. Ideally this would be replaced by a factory pattern, -// but we will leave it this way for now. +/** + * @brief Get a specific filler from the specification given in FillerParameter. + * + * Ideally this would be replaced by a factory pattern, but we will leave it + * this way for now. + */ template Filler* GetFiller(const FillerParameter& param) { const std::string& type = param.type(); @@ -136,6 +280,10 @@ Filler* GetFiller(const FillerParameter& param) { return new UniformFiller(param); } else if (type == "xavier") { return new XavierFiller(param); + } else if (type == "msra") { + return new MSRAFiller(param); + } else if (type == "bilinear") { + return new BilinearFiller(param); } else { CHECK(false) << "Unknown filler name: " << param.type(); }