Rework bn layer forward & backward pass and fix few bugs.
This patch only includes training passes.
**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped
Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
private:
Tensor weight;
Tensor bias;
- Tensor mu;
- Tensor var;
+
+ Tensor mu; /**< moving mu used for inferencing.
+ momentum * mu + (1 - momenutm) * mu
+ of current batch is used */
+ Tensor var; /**< moving var used for inferencing.
+ momentum * var + (1 - momenutm) * var
+ of current batch is used */
+
+ Tensor cvar; /**< training varaince saved in bn_layer::forwarding and used in
+ bn_layer::backwarding */
Tensor gamma;
Tensor beta;
+ Tensor x_normalized;
float epsilon;
};
} // namespace nntrainer
#include <assert.h>
#include <bn_layer.h>
#include <layer.h>
+#include <lazy_tensor.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <parse_util.h>
namespace nntrainer {
+/// @todo add channel wise bn for convolutional layer.
int BatchNormalizationLayer::initialize(bool last) {
int status = ML_ERROR_NONE;
dim = input_dim;
- output_dim = dim;
+ dim = input_dim;
+ dim.batch(1);
+ output_dim = input_dim;
- this->gamma = Tensor(dim.channel(), dim.batch(), dim.width());
- this->beta = Tensor(dim.channel(), dim.batch(), dim.width());
- beta.setZero();
+ this->mu = Tensor(dim);
+ this->var = Tensor(dim);
+ this->gamma = Tensor(dim);
+ this->beta = Tensor(dim);
+
+ mu.setZero();
+ var.setValue(1);
gamma.setZero();
+ beta.setZero();
+
+ weights.clear();
+ weights.push_back(gamma);
+ weights.push_back(beta);
return status;
}
int BatchNormalizationLayer::setOptimizer(Optimizer &opt) {
this->opt.setType(opt.getType());
this->opt.setOptParam(opt.getOptParam());
-
- this->epsilon = 0.0;
+ this->epsilon = epsilon;
return this->opt.initialize(dim, false);
}
NN_RETURN_STATUS();
unsigned int type = parseLayerProperty(key);
-
switch (static_cast<PropertyType>(type)) {
case PropertyType::epsilon:
status = setFloat(epsilon, value);
}
Tensor BatchNormalizationLayer::forwarding(Tensor in, int &status) {
- Tensor temp;
- assert(dim.batch() > 0);
- hidden = in;
- mu = in.sum(0).multiply(1.0 / dim.batch());
+ if (trainable) {
+ Tensor deviation;
+ this->input = in;
+
+ ///< current mu / var */
+ Tensor cmu;
- temp = in.subtract(mu);
+ cmu = in.average(0);
- var = temp.multiply(temp).sum(0).multiply(1.0 / dim.batch());
+ deviation = in.subtract(cmu);
- Tensor hath = temp.divide(var.add(0.001).apply(sqrtFloat));
+ this->cvar = deviation.chain()
+ .multiply_i(deviation)
+ .sum(0)
+ .multiply_i(1.0 / input_dim.batch())
+ .add_i(epsilon)
+ .run();
- hidden = hath;
+ /// @todo replace momentum paramter
+ float momentum = 0.9;
+ this->mu.multiply_i(momentum);
+ this->mu.add_i(cmu, 1 - momentum);
+ this->var.multiply_i(momentum);
+ this->var.add_i(cvar, 1 - momentum);
- Tensor ret = hath.multiply(gamma).add(beta);
+ this->x_normalized = deviation.divide(cvar.apply(sqrtFloat));
- status = ML_ERROR_NONE;
- return ret;
+ this->hidden = x_normalized.chain().multiply_i(gamma).add_i(beta).run();
+
+ status = ML_ERROR_NONE;
+ } else {
+ /// NYI
+ status = ML_ERROR_NOT_SUPPORTED;
+ throw std::runtime_error("not_yet_implemented");
+ }
+ return hidden;
}
-Tensor BatchNormalizationLayer::backwarding(Tensor derivative, int iteration) {
+Tensor BatchNormalizationLayer::backwarding(Tensor dy, int iteration) {
Tensor dbeta;
Tensor dgamma;
- assert(dim.batch() > 0);
+ Tensor dx_normalized;
- Tensor hath = hidden;
- Tensor dy = derivative.multiply(hath.multiply(gamma).add(beta));
+ Tensor dx;
+ int batch = dy.batch();
+
+ dgamma = x_normalized.multiply(dy).sum(0);
dbeta = dy.sum(0);
- dgamma = (input.subtract(mu)
- .divide(var.add(0.001).apply(sqrtFloat))
- .multiply(dy)
- .sum(0));
-
- Tensor Temp =
- (dy.multiply(dim.batch()).subtract(dy.sum(0)))
- .subtract(input.subtract(mu)
- .divide(var.add(0.001))
- .multiply(dy.multiply(input.subtract(mu)).sum(0)));
- Tensor dh = Temp.multiply(1.0 / dim.batch())
- .multiply(var.add(0.001).apply(sqrtFloat))
- .multiply(gamma);
-
- float ll = opt.getLearningRate();
- if (opt.getDecaySteps() != -1) {
- ll = ll * pow(opt.getDecayRate(), (iteration / opt.getDecaySteps()));
- }
- gamma = gamma.subtract(dgamma.multiply(ll));
- beta = beta.subtract(dbeta.multiply(ll));
+ dx_normalized = dy.multiply(gamma);
+
+ dx = dx_normalized.chain()
+ .multiply_i(batch)
+ .subtract_i(dx_normalized.sum(0))
+ .subtract_i(
+ x_normalized.multiply(dx_normalized.multiply(x_normalized).sum(0)))
+ .divide_i(cvar.multiply(batch))
+ .run();
+
+ gradients.clear();
+ gradients.push_back(dgamma);
+ gradients.push_back(dbeta);
+
+ opt.apply_gradients(weights, gradients, iteration);
- return dh;
+ return dx;
}
void BatchNormalizationLayer::read(std::ifstream &file) {
- file.read((char *)&mu, sizeof(float));
- file.read((char *)&var, sizeof(float));
+ mu.read(file);
+ var.read(file);
gamma.read(file);
beta.read(file);
}
void BatchNormalizationLayer::save(std::ofstream &file) {
- file.write((char *)&mu, sizeof(float));
- file.write((char *)&var, sizeof(float));
+ mu.save(file);
+ var.save(file);
gamma.save(file);
beta.save(file);
}
this->hidden.copy(from->hidden);
this->weight.copy(from->weight);
this->bias.copy(from->bias);
- this->mu = from->mu;
- this->var = from->var;
+ this->mu.copy(from->mu);
+ this->var.copy(from->var);
+ this->cvar.copy(from->cvar);
this->gamma.copy(from->gamma);
this->beta.copy(from->beta);
}
##
# @brief save data into file with filename
# @param[in] data The data to be saved
-def save(filename, data):
+def save(filename, *data):
if os.path.isfile(filename):
os.remove(filename)
with open(filename, 'ab') as outfile:
- np.array(data, dtype=np.float32).tofile(outfile)
- print(data.shape, " data is generated")
+ for item in data:
+ np.array(item, dtype=np.float32).tofile(outfile)
+ try:
+ print(item.shape, " data is generated")
+ except:
+ pass
##
# @brief generate random tensor
-def gen_tensor(shape, dtype=None):
- return np.random.random_sample(input_shape)
+def gen_tensor(shape, dtype=dtypes.float32):
+ return np.random.random_sample(shape)
##
# @brief generate random data and save
# tested with tf 1.14.0
# @param[in] x input
# @param[in] trainable
-# @return bn output, [updated_gamma, updated_beta], grad_result (0. dx / 1. gamma / 2. beta / 3. mean / 4. variance)
+# @return input_variables, bn output, output_variables, grad_result (0. dx / 1. gamma / 2. beta / 3. mean / 4. variance)
# for updated_gamma, updated_beta, x <- x - grad is used for easier calculation
-def bn_tf(x, trainable=False):
+def bn_tf(x, *, trainable=True, init_beta=gen_tensor, init_gamma=gen_tensor, axis=[1, 2, 3]):
tf.compat.v1.reset_default_graph()
tf_input = tf.compat.v1.placeholder(
dtype=dtypes.float32, shape=x.shape, name='input')
bnlayer = tf.keras.layers.BatchNormalization(
- axis=0,
+ axis=axis,
trainable=trainable,
+ momentum=1.0,
gamma_initializer=gen_tensor,
beta_initializer=gen_tensor)(tf_input)
- bn_variables = tf.compat.v1.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
+ bn_variables = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES,
scope='batch_normalization')
+
input_variables = [tf_input] + bn_variables
grad = tf.gradients(bnlayer, input_variables)
+ f_dict = {tf_input: x, tf.keras.backend.learning_phase(): trainable}
+
with tf.compat.v1.Session() as sess:
+ with tf.compat.v1.variable_scope('bn'):
sess.run(tf.compat.v1.global_variables_initializer())
- bn_result = sess.run(bnlayer, feed_dict={tf_input: x})
- grad_result = sess.run(grad, feed_dict={tf_input: x})
+
+ old_var = sess.run(input_variables, feed_dict=f_dict)
+ bn_result = sess.run(bnlayer, feed_dict=f_dict)
+ grad_result = sess.run(grad, feed_dict=f_dict)
+
updated_gamma = sess.run(input_variables[1] - grad_result[1])
- updated_beta = sess.run(input_variables[1] - grad_result[2])
+ updated_beta = sess.run(input_variables[2] - grad_result[2])
+
+ output_variables = [bn_result, updated_gamma, updated_beta]
if DEBUG:
- print(x[0], bn_result[0])
- print("updated_gamma: %s" % updated_gamma)
- print("updated_beta: %s" % updated_beta)
- for item, input_variable in zip(grad_result, input_variables):
- print(input_variable.name)
- print(item[0])
+ print("======================================")
+ print("Input:\n %s\n Output:\n %s" % (x[0], bn_result[0]))
+ print("dx: %s" % grad_result[0][0][0])
+ print("gradient of gamma: %s" % grad_result[1][0][0], grad_result[1].shape)
+ print("gradient of beta: %s" % grad_result[2][0][0], grad_result[2].shape)
+ print("======================================")
+
+ return old_var, output_variables, grad_result
- return bn_result, [updated_gamma, updated_beta], grad_result
def gen_test_case_conv(i_b, i_c, i_h, i_w, k_c, k_h, k_w, padding, stride, bias, base_name):
x=gen_input(base_name+"conv2DLayer.in", [i_b, i_c, i_h, i_w])
golden_fc = fc_tf(input_data, kernel, None, bias, activation=tf.nn.softmax)
save(base_name + "goldenFCResultSoftmax.out", golden_fc[0])
-def get_test_case_bn(input_shape, training=False):
- pass
+def gen_test_case_bn(input_shape, base_name, training=True):
+ input_data = gen_input(base_name + "BNLayerInput.in", input_shape)
+
+ input_variables, output_variables, grad = bn_tf(input_data)
+
+ # mu / var / gamma / beta
+ save(base_name + "BNLayerWeights.in", input_variables[3], input_variables[4], input_variables[1], input_variables[2])
+ save(base_name + "goldenBNResultForward.out", output_variables[0])
+ # todo: change 0 to initial moving avg / std in case of training
+ save(base_name + "goldenBNLayerAfterUpdate.out", 0, 0, output_variables[1], output_variables[2])
+ save(base_name + "goldenBNLayerBackwardDx.out", grad[0])
+
if __name__ == "__main__":
target = int(sys.argv[1])
gen_test_case_fc(input_shape = [3, 1, 1, 12],
kernel_shape = [12, 15],
base_name = "test_1_")
+
+# Bn layer unit test case:
+ if target == 5:
+ gen_test_case_bn(input_shape = [3, 1, 4, 5], base_name = "test_5_")
EXPECT_EQ(status, ML_ERROR_NONE);
}
+class nntrainer_batchNormalizationLayer_TFmatch : public ::testing::Test {
+protected:
+ nntrainer_batchNormalizationLayer_TFmatch() {}
+
+ virtual void SetUp() {
+ std::vector<std::string> input_str;
+ input_str.push_back("input_shape=3:1:4:5");
+ input_str.push_back("epsilon=0.001");
+
+ nntrainer::Optimizer opt;
+ nntrainer::OptParam p;
+ p.learning_rate = 1;
+
+ status = opt.setType(nntrainer::OptType::sgd);
+ ASSERT_EQ(status, ML_ERROR_NONE);
+
+ status = opt.setOptParam(p);
+ ASSERT_EQ(status, ML_ERROR_NONE);
+
+ status = layer.setOptimizer(opt);
+ ASSERT_EQ(status, ML_ERROR_NONE);
+
+ status = layer.setProperty(input_str);
+ ASSERT_EQ(status, ML_ERROR_NONE);
+
+ status = layer.initialize(false);
+ ASSERT_EQ(status, ML_ERROR_NONE);
+
+ in = nntrainer::Tensor(3, 1, 4, 5);
+ expected = nntrainer::Tensor(3, 1, 4, 5);
+
+ loadFile("test_5_BNLayerInput.in", in);
+ loadFile("test_5_BNLayerWeights.in", layer);
+ }
+
+ void matchOutput(const nntrainer::Tensor &result, const char *path) {
+ loadFile(path, expected);
+ const float *out_ptr, *golden;
+
+ golden = expected.getData();
+ out_ptr = result.getData();
+
+ for (size_t i = 0; i < result.length(); ++i) {
+ EXPECT_NEAR(out_ptr[i], golden[i], tolerance);
+ }
+ }
+
+ int status;
+ nntrainer::BatchNormalizationLayer layer;
+ nntrainer::Tensor expected;
+ nntrainer::Tensor in;
+
+private:
+ template <typename T> void loadFile(const char *filename, T &t) {
+ std::ifstream file(filename);
+ if (!file.good()) {
+ throw std::runtime_error("filename is wrong");
+ }
+ t.read(file);
+ file.close();
+ }
+};
+
+TEST_F(nntrainer_batchNormalizationLayer_TFmatch,
+ forward_backward_training_01_p) {
+ int status = ML_ERROR_NONE;
+ layer.setTrainable(true);
+ nntrainer::Tensor forward_result = layer.forwarding(in, status);
+ EXPECT_EQ(status, ML_ERROR_NONE);
+
+ matchOutput(forward_result, "test_5_goldenBNResultForward.out");
+
+ nntrainer::Tensor backward_result =
+ layer.backwarding(constant(1.0, 3, 1, 4, 5), 1);
+
+ matchOutput(backward_result, "test_5_goldenBNLayerBackwardDx.out");
+}
+
/**
* @brief Convolution 2D Layer
*/