From 31109c8000ff1e98f3a3f2028f1468348412df82 Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Mon, 16 Mar 2020 12:44:59 +0900 Subject: [PATCH] add Weight initialization Method Add Weight Initialization Method "lecun_normal" : LeCun Normal Initialization "lecun_uniform" : LeCun Uniform Initialization "xavier_normal" : Xavier Normal Initialization "xavier_uniform" : Xavier Uniform Initialization "he_normal" : He Normal Initialization "he_uniform" : He Uniform Initialization **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- include/layers.h | 34 +++++++++++++++++++++---- include/neuralnet.h | 11 ++++++-- src/layers.cpp | 73 ++++++++++++++++++++++++++++++++++++++++++++++++----- src/neuralnet.cpp | 28 +++++++++++++++++--- 4 files changed, 129 insertions(+), 17 deletions(-) diff --git a/include/layers.h b/include/layers.h index 179dbf5..3c5c0de 100644 --- a/include/layers.h +++ b/include/layers.h @@ -69,6 +69,25 @@ typedef enum { ACT_TANH, ACT_SIGMOID, ACT_RELU, ACT_UNKNOWN } acti_type; typedef enum { LAYER_IN, LAYER_FC, LAYER_OUT, LAYER_BN, LAYER_UNKNOWN } layer_type; /** + * @brief Enumeration of Weight Initialization Type + * 0. WEIGHT_LECUN_NORMAL ( LeCun normal initialization ) + * 1. WEIGHT_LECUN_UNIFORM (LeCun uniform initialization ) + * 2. WEIGHT_XAVIER_NORMAL ( Xavier normal initialization ) + * 3. WEIGHT_XAVIER_UNIFORM ( Xavier uniform initialization ) + * 4. WEIGHT_HE_NORMAL ( He normal initialization ) + * 5. WEIGHT_HE_UNIFORM ( He uniform initialization ) + */ +typedef enum { + WEIGHT_LECUN_NORMAL, + WEIGHT_LECUN_UNIFORM, + WEIGHT_XAVIER_NORMAL, + WEIGHT_XAVIER_UNIFORM, + WEIGHT_HE_NORMAL, + WEIGHT_HE_UNIFORM, + WEIGHT_UNKNOWN +} weightIni_type; + +/** * @brief type for the Optimizor to save hyper-parameter */ typedef struct { @@ -123,8 +142,9 @@ class Layer { * @param[in] w Width * @param[in] id index of this layer * @param[in] init_zero Bias initialization with zero + * @param[in] wini Weight Initialization Scheme */ - virtual void initialize(int b, int h, int w, int id, bool init_zero) = 0; + virtual void initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini) = 0; /** * @brief read layer Weight & Bias data from file @@ -282,8 +302,9 @@ class InputLayer : public Layer { * @param[in] w width * @param[in] id index of this layer * @param[in] init_zero boolean to set Bias zero + * @param[in] wini Weight Initialization Scheme */ - void initialize(int b, int h, int w, int id, bool init_zero); + void initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini); /** * @brief Copy Layer @@ -380,8 +401,9 @@ class FullyConnectedLayer : public Layer { * @param[in] w width * @param[in] id layer index * @param[in] init_zero boolean to set Bias zero + * @param[in] wini Weight Initialization Scheme */ - void initialize(int b, int h, int w, int id, bool init_zero); + void initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini); private: Tensor Weight; @@ -467,8 +489,9 @@ class OutputLayer : public Layer { * @param[in] w width * @param[in] id layer index * @param[in] init_zero boolean to set Bias zero + * @param[in] wini Weight Initialization Scheme */ - void initialize(int b, int w, int h, int id, bool init_zero); + void initialize(int b, int w, int h, int id, bool init_zero, weightIni_type wini); /** * @brief get Loss value @@ -577,8 +600,9 @@ class BatchNormalizationLayer : public Layer { * @param[in] w width * @param[in] id layer index * @param[in] init_zero boolean to set Bias zero + * @param[in] wini Weight Initialization Scheme */ - void initialize(int b, int h, int w, int id, bool init_zero); + void initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini); private: Tensor Weight; diff --git a/include/neuralnet.h b/include/neuralnet.h index 90a028f..fa226dc 100644 --- a/include/neuralnet.h +++ b/include/neuralnet.h @@ -51,9 +51,11 @@ typedef enum { NET_KNN, NET_REG, NET_NEU, NET_UNKNOWN } net_type; * 2. NET ( Network Token ) * 3. ACTI ( Activation Token ) * 4. LAYER ( Layer Token ) - * 5. UNKNOWN + * 5. WEIGHTINI ( Weight Initialization Token ) + * 6. UNKNOWN */ -typedef enum { TOKEN_OPT, TOKEN_COST, TOKEN_NET, TOKEN_ACTI, TOKEN_LAYER, TOKEN_UNKNOWN } input_type; +typedef enum { TOKEN_OPT, TOKEN_COST, TOKEN_NET, TOKEN_ACTI, TOKEN_LAYER, TOKEN_WEIGHTINI, TOKEN_UNKNOWN } input_type; + /** * @class NeuralNetwork Class @@ -214,6 +216,11 @@ class NeuralNetwork { Layers::cost_type cost; /** + * @brief Weight Initialization type + */ + Layers::weightIni_type weightini; + + /** * @brief Model path to save or read */ std::string model; diff --git a/src/layers.cpp b/src/layers.cpp index 5010841..3a4e148 100644 --- a/src/layers.cpp +++ b/src/layers.cpp @@ -23,6 +23,7 @@ #include "include/layers.h" #include +#include /** * @brief random function @@ -93,6 +94,63 @@ float ReluPrime(float x) { } } +static void WeightInitialization(Tensor W, unsigned int width, unsigned int height, Layers::weightIni_type init_type) { + std::random_device rd; + std::mt19937 gen(rd()); + + switch (init_type) { + case Layers::WEIGHT_LECUN_NORMAL: { + std::normal_distribution dist(0, sqrt(1 / height)); + for (unsigned int i = 0; i < width; ++i) + for (unsigned int j = 0; j < height; ++j) { + float f = dist(gen); + W.setValue(0, j, i, f); + } + } break; + case Layers::WEIGHT_LECUN_UNIFORM: { + std::uniform_real_distribution dist(-1.0 * sqrt(1.0 / height), sqrt(1.0 / height)); + for (unsigned int i = 0; i < width; ++i) + for (unsigned int j = 0; j < height; ++j) { + float f = dist(gen); + W.setValue(0, j, i, f); + } + } break; + case Layers::WEIGHT_XAVIER_NORMAL: { + std::normal_distribution dist(0, sqrt(2.0 / (width + height))); + for (unsigned int i = 0; i < width; ++i) + for (unsigned int j = 0; j < height; ++j) { + float f = dist(gen); + W.setValue(0, j, i, f); + } + } break; + case Layers::WEIGHT_XAVIER_UNIFORM: { + std::uniform_real_distribution dist(-1.0 * sqrt(6.0 / (height + width)), sqrt(6.0 / (height + width))); + for (unsigned int i = 0; i < width; ++i) + for (unsigned int j = 0; j < height; ++j) { + float f = dist(gen); + W.setValue(0, j, i, f); + } + } break; + case Layers::WEIGHT_HE_NORMAL: { + std::normal_distribution dist(0, sqrt(2.0 / (height))); + for (unsigned int i = 0; i < width; ++i) + for (unsigned int j = 0; j < height; ++j) { + float f = dist(gen); + W.setValue(0, j, i, f); + } + } break; + case Layers::WEIGHT_HE_UNIFORM: { + std::uniform_real_distribution dist(-1.0 * sqrt(6.0 / (height)), sqrt(6.0 / (height))); + for (unsigned int i = 0; i < width; ++i) + for (unsigned int j = 0; j < height; ++j) { + float f = dist(gen); + W.setValue(0, j, i, f); + } + } break; + default: + break; + } +} namespace Layers { void InputLayer::setOptimizer(Optimizer opt) { @@ -132,7 +190,7 @@ Tensor InputLayer::forwarding(Tensor input) { return Input; } -void InputLayer::initialize(int b, int h, int w, int id, bool init_zero) { +void InputLayer::initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini) { this->batch = b; this->width = w; this->height = h; @@ -140,7 +198,7 @@ void InputLayer::initialize(int b, int h, int w, int id, bool init_zero) { this->bnfallow = false; } -void FullyConnectedLayer::initialize(int b, int h, int w, int id, bool init_zero) { +void FullyConnectedLayer::initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini) { this->batch = b; this->width = w; this->height = h; @@ -151,7 +209,8 @@ void FullyConnectedLayer::initialize(int b, int h, int w, int id, bool init_zero Weight = Tensor(h, w); Bias = Tensor(1, w); - Weight = Weight.applyFunction(random); + WeightInitialization(Weight, w, h, wini); + if (init_zero) { Bias.setZero(); } else { @@ -259,7 +318,7 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) { return ret; } -void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero) { +void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini) { this->batch = b; this->width = w; this->height = h; @@ -270,7 +329,9 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero) { this->cost = cost; this->bnfallow = false; - Weight = Weight.applyFunction(random); + // Weight = Weight.applyFunction(random); + WeightInitialization(Weight, w, h, wini); + if (init_zero) { Bias.setZero(); } else { @@ -477,7 +538,7 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) { return ret; } -void BatchNormalizationLayer::initialize(int b, int h, int w, int id, bool init_zero) { +void BatchNormalizationLayer::initialize(int b, int h, int w, int id, bool init_zero, weightIni_type wini) { this->batch = b; this->width = w; this->height = h; diff --git a/src/neuralnet.cpp b/src/neuralnet.cpp index 3d9de1f..9e21407 100644 --- a/src/neuralnet.cpp +++ b/src/neuralnet.cpp @@ -93,6 +93,17 @@ std::vector activation_string = {"tanh", "sigmoid", "relu"}; std::vector layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer"}; /** + * @brief Weight Initialization Type String from configure file + * "lecun_normal" : LeCun Normal Initialization + * "lecun_uniform" : LeCun Uniform Initialization + * "xavier_normal" : Xavier Normal Initialization + * "xavier_uniform" : Xavier Uniform Initialization + * "he_normal" : He Normal Initialization + * "he_uniform" : He Uniform Initialization + */ + std::vector weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"}; + +/** * @brief Check Existance of File * @param[in] filename file path to check * @retval boolean true if exists @@ -171,6 +182,14 @@ unsigned int parseType(std::string ll, input_type t) { } ret = i - 1; break; + case TOKEN_WEIGHTINI: + for (i = 0; i < weightini_string.size(); i++) { + if (caseInSensitiveCompare(weightini_string[i], ll)) { + return (i); + } + } + ret = i - 1; + break; case TOKEN_UNKNOWN: default: ret = 3; @@ -209,6 +228,7 @@ void NeuralNetwork::init() { opt.type = (Layers::opt_type)parseType(iniparser_getstring(ini, "Network:Optimizer", NULL), TOKEN_OPT); opt.activation = (Layers::acti_type)parseType(iniparser_getstring(ini, "Network:Activation", NULL), TOKEN_ACTI); cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST); + weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI); model = iniparser_getstring(ini, "Network:Model", "model.bin"); batchsize = iniparser_getint(ini, "Network:minibatch", 1); @@ -256,7 +276,7 @@ void NeuralNetwork::init() { case Layers::LAYER_IN: { Layers::InputLayer *inputlayer = new (Layers::InputLayer); inputlayer->setType(t); - inputlayer->initialize(batchsize, 1, HiddenSize[i], id, b_zero); + inputlayer->initialize(batchsize, 1, HiddenSize[i], id, b_zero, weightini); inputlayer->setOptimizer(opt); inputlayer->setNormalization(iniparser_getboolean(ini, (layers_name[i] + ":Normalization").c_str(), false)); inputlayer->setStandardization(iniparser_getboolean(ini, (layers_name[i] + ":Standardization").c_str(), false)); @@ -265,14 +285,14 @@ void NeuralNetwork::init() { case Layers::LAYER_FC: { Layers::FullyConnectedLayer *fclayer = new (Layers::FullyConnectedLayer); fclayer->setType(t); - fclayer->initialize(batchsize, HiddenSize[i - 1], HiddenSize[i], id, b_zero); + fclayer->initialize(batchsize, HiddenSize[i - 1], HiddenSize[i], id, b_zero, weightini); fclayer->setOptimizer(opt); layers.push_back(fclayer); } break; case Layers::LAYER_OUT: { Layers::OutputLayer *outputlayer = new (Layers::OutputLayer); outputlayer->setType(t); - outputlayer->initialize(batchsize, HiddenSize[i - 1], HiddenSize[i], id, b_zero); + outputlayer->initialize(batchsize, HiddenSize[i - 1], HiddenSize[i], id, b_zero, weightini); outputlayer->setOptimizer(opt); outputlayer->setCost(cost); outputlayer->setSoftmax(iniparser_getboolean(ini, (layers_name[i] + ":Softmax").c_str(), false)); @@ -282,7 +302,7 @@ void NeuralNetwork::init() { Layers::BatchNormalizationLayer *bnlayer = new (Layers::BatchNormalizationLayer); bnlayer->setType(t); bnlayer->setOptimizer(opt); - bnlayer->initialize(batchsize, 1, HiddenSize[i], id, b_zero); + bnlayer->initialize(batchsize, 1, HiddenSize[i], id, b_zero, weightini); layers.push_back(bnlayer); layers[i - 1]->setBNfallow(true); } break; -- 2.7.4