From 59de6c7190eb57bfa7511ede30c965c1d5963a06 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Tue, 26 May 2015 12:39:14 -0700 Subject: [PATCH] include comment on Saxe and sqrt(2) scaling factor although different and independent, the derivation of Saxe et al. with regards to the scaling factor might be of interest. --- include/caffe/filler.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index 0125b30..446f5b5 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -175,6 +175,9 @@ class XavierFiller : public Filler { * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically * accounts for ReLU nonlinearities. * + * Aside: for another perspective on the scaling factor, see the derivation of + * [Saxe, McClelland, and Ganguli 2013 (v3)]. + * * It fills the incoming matrix by randomly sampling Gaussian data with std = * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on * the variance_norm option. You should make sure the input blob has shape (num, -- 2.7.4