From aae17359d166eaf43eabffb3b27ab4418891d815 Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Tue, 17 Mar 2020 09:40:34 +0900 Subject: [PATCH] Fix Weight Decay L2Norm There is wrong implementation about weight decay l2norm. In this pr, it is fixed **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- Applications/Classification/res/Classification.ini | 27 ++++++++-------------- src/layers.cpp | 19 +++++++-------- src/neuralnet.cpp | 19 +++++++-------- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/Applications/Classification/res/Classification.ini b/Applications/Classification/res/Classification.ini index 7998ef0..f2c96e2 100644 --- a/Applications/Classification/res/Classification.ini +++ b/Applications/Classification/res/Classification.ini @@ -2,42 +2,35 @@ [Network] Type = NeuralNetwork # Network Type : Regression, KNN, NeuralNetwork Layers = inputlayer \ - fc1layer \ outputlayer #Layers of Neuralnetwork -Learning_rate = 0.001 # Learning Rate +Learning_rate = 0.0001 # Learning Rate Decay_rate = 0.96 # for the decay_rate for the decayed learning rate Decay_steps = 1000 # decay step for the exponential decayed learning rate Epoch = 30000 # Epoch -Optimizer = sgd # Optimizer : sgd (stochastic gradien decent), +Optimizer = adam # Optimizer : sgd (stochastic gradien decent), # adam (Adamtive Moment Estimation) Activation = sigmoid # activation : sigmoid, tanh -Cost = cross # Cost(loss) function : msr (mean square root error) +Cost = cross # Cost(loss) function : msr (mean square root error) +Weight_Decay = l2norm +weight_Decay_Lambda = 0.005 # categorical ( for logistic regression ) Model = "model.bin" # model path to save / read minibatch = 32 # mini batch size beta1 = 0.9 # beta 1 for adam beta2 = 0.9999 # beta 2 for adam -epsilon = 1e-8 # epsilon for adam +epsilon = 1e-7 # epsilon for adam # Layer Section : Name [inputlayer] Type = InputLayer Id = 0 # Layer Id -Height = 1 -Width = 62720 # Input Layer Dimension +HiddenSize = 62720 # Input Layer Dimension Bias_zero = true # Zero Bias - -[fc1layer] -Type = FullyConnectedLayer -Id = 1 -Height = 62720 # Input Dimension ( = Weight Height ) -Width = 128 # Hidden Layer Dimension ( = Weight Width ) -Bias_zero = true +Normalization = true [outputlayer] Type = OutputLayer Id = 2 -Height = 128 # Hidden Layer Dimension ( = Weight Height ) -Width = 10 # Output Layer Dimension ( = Weight Width ) +HiddenSize = 10 # Output Layer Dimension ( = Weight Width ) Bias_zero = true -Softmax = true +Softmax = false diff --git a/src/layers.cpp b/src/layers.cpp index 273517d..424b95c 100644 --- a/src/layers.cpp +++ b/src/layers.cpp @@ -94,9 +94,10 @@ float ReluPrime(float x) { } } -static void WeightInitialization(Tensor W, unsigned int width, unsigned int height, Layers::weightIni_type init_type) { +static Tensor WeightInitialization(unsigned int width, unsigned int height, Layers::weightIni_type init_type) { std::random_device rd; std::mt19937 gen(rd()); + Tensor W = Tensor(height, width); switch (init_type) { case Layers::WEIGHT_LECUN_NORMAL: { @@ -148,9 +149,12 @@ static void WeightInitialization(Tensor W, unsigned int width, unsigned int heig } } break; default: + W.setZero(); break; } + return W; } + namespace Layers { void InputLayer::setOptimizer(Optimizer opt) { @@ -206,10 +210,8 @@ void FullyConnectedLayer::initialize(int b, int h, int w, int id, bool init_zero this->init_zero = init_zero; this->bnfallow = false; - Weight = Tensor(h, w); Bias = Tensor(1, w); - - WeightInitialization(Weight, w, h, wini); + Weight = WeightInitialization(w, h, wini); if (init_zero) { Bias.setZero(); @@ -286,7 +288,7 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) { Tensor dJdW = Input.transpose().dot(dJdB); if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) { - dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda)); + dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda)); } Tensor ret = dJdB.dot(Weight.transpose()); @@ -329,13 +331,12 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero, weight this->height = h; this->index = id; this->init_zero = init_zero; - Weight = Tensor(h, w); + Bias = Tensor(1, w); this->cost = cost; this->bnfallow = false; - // Weight = Weight.applyFunction(random); - WeightInitialization(Weight, w, h, wini); + Weight = WeightInitialization(w, h, wini); if (init_zero) { Bias.setZero(); @@ -528,7 +529,7 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) { Tensor dJdW = Input.transpose().dot(dJdB); if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) { - dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda)); + dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda)); } ret = dJdB.dot(Weight.transpose()); diff --git a/src/neuralnet.cpp b/src/neuralnet.cpp index 5c1a8d5..8a79799 100644 --- a/src/neuralnet.cpp +++ b/src/neuralnet.cpp @@ -59,14 +59,14 @@ namespace Network { * "sgd" : Stochestic Gradient Descent * "adam" : Adaptive Moment Estimation */ -std::vector Optimizer_string = {"sgd", "adam"}; +std::vector Optimizer_string = {"sgd", "adam", "unkown"}; /** * @brief Cost Function String from configure file * "msr" : Mean Squared Roots * "caterogical" : Categorical Cross Entropy */ -std::vector Cost_string = {"categorical", "msr", "cross"}; +std::vector Cost_string = {"categorical", "msr", "cross", "unkown"}; /** * @brief Network Type String from configure file @@ -74,7 +74,7 @@ std::vector Cost_string = {"categorical", "msr", "cross"}; * "regression" : Logistic Regression * "neuralnet" : Neural Network */ -std::vector NetworkType_string = {"knn", "regression", "neuralnet"}; +std::vector NetworkType_string = {"knn", "regression", "neuralnet", "unkown"}; /** * @brief Activation Type String from configure file @@ -82,7 +82,7 @@ std::vector NetworkType_string = {"knn", "regression", "neuralnet"} * "sigmoid" : sigmoid * "relu" : relu */ -std::vector activation_string = {"tanh", "sigmoid", "relu"}; +std::vector activation_string = {"tanh", "sigmoid", "relu", "unkown"}; /** * @brief Layer Type String from configure file @@ -90,7 +90,7 @@ std::vector activation_string = {"tanh", "sigmoid", "relu"}; * "FullyConnectedLayer" : Fully Connected Layer Object * "OutputLayer" : Output Layer Object */ -std::vector layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer"}; +std::vector layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer", "Unkown"}; /** * @brief Weight Initialization Type String from configure file @@ -101,14 +101,14 @@ std::vector layer_string = {"InputLayer", "FullyConnectedLayer", "O * "he_normal" : He Normal Initialization * "he_uniform" : He Uniform Initialization */ -std::vector weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"}; + std::vector weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform", "unkown"}; /** * @brief Weight Decay String from configure file * "L2Norm" : squared norm regularization * "Regression" : Regression */ -std::vector weight_decay_string = {"L2Norm", "Regression"}; +std::vector weight_decay_string = {"l2norm", "regression", "unkown"}; /** * @brief Check Existance of File @@ -245,10 +245,11 @@ void NeuralNetwork::init() { cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST); weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI); - opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", NULL), TOKEN_WEIGHT_DECAY); + opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", "Unknown"), TOKEN_WEIGHT_DECAY); + opt.weight_decay.lambda = 0.0; if (opt.weight_decay.type == Layers::WEIGHT_DECAY_L2NORM){ - opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:weight_decay_lambda", 0.0); + opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:Weight_Decay_Lambda", 0.0); } model = iniparser_getstring(ini, "Network:Model", "model.bin"); -- 2.7.4