From aae17359d166eaf43eabffb3b27ab4418891d815 Mon Sep 17 00:00:00 2001
From: "jijoong.moon" <jijoong.moon@samsung.com>
Date: Tue, 17 Mar 2020 09:40:34 +0900
Subject: [PATCH] Fix Weight Decay L2Norm

There is wrong implementation about weight decay l2norm.
In this pr, it is fixed

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
---
 Applications/Classification/res/Classification.ini | 27 ++++++++--------------
 src/layers.cpp                                     | 19 +++++++--------
 src/neuralnet.cpp                                  | 19 +++++++--------
 3 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/Applications/Classification/res/Classification.ini b/Applications/Classification/res/Classification.ini
index 7998ef0..f2c96e2 100644
--- a/Applications/Classification/res/Classification.ini
+++ b/Applications/Classification/res/Classification.ini
@@ -2,42 +2,35 @@
 [Network]
 Type = NeuralNetwork	# Network Type : Regression, KNN, NeuralNetwork
 Layers = inputlayer \
-         fc1layer \
 	 outputlayer	#Layers of Neuralnetwork
-Learning_rate = 0.001 	# Learning Rate
+Learning_rate = 0.0001 	# Learning Rate
 Decay_rate = 0.96	# for the decay_rate for the decayed learning rate
 Decay_steps = 1000       # decay step for the exponential decayed learning rate
 Epoch = 30000		# Epoch 
-Optimizer = sgd 	# Optimizer : sgd (stochastic gradien decent),
+Optimizer = adam 	# Optimizer : sgd (stochastic gradien decent),
  	    		#             adam (Adamtive Moment Estimation)
 Activation = sigmoid 	# activation : sigmoid, tanh
-Cost = cross   		# Cost(loss) function : msr (mean square root error)
+Cost = cross  		# Cost(loss) function : msr (mean square root error)
+Weight_Decay = l2norm
+weight_Decay_Lambda = 0.005
                         #                       categorical ( for logistic regression )
 Model = "model.bin"  	# model path to save / read
 minibatch = 32		# mini batch size
 beta1 = 0.9 		# beta 1 for adam
 beta2 = 0.9999	# beta 2 for adam
-epsilon = 1e-8	# epsilon for adam
+epsilon = 1e-7	# epsilon for adam
 
 # Layer Section : Name
 [inputlayer]
 Type = InputLayer
 Id = 0			# Layer Id
-Height = 1		
-Width = 62720		# Input Layer Dimension
+HiddenSize = 62720		# Input Layer Dimension
 Bias_zero = true	# Zero Bias
-
-[fc1layer]
-Type = FullyConnectedLayer
-Id = 1
-Height = 62720		# Input Dimension ( = Weight Height )
-Width = 128		# Hidden Layer Dimension ( = Weight Width )
-Bias_zero = true
+Normalization = true
 
 [outputlayer]
 Type = OutputLayer
 Id = 2
-Height = 128		# Hidden Layer Dimension ( = Weight Height )
-Width = 10		# Output Layer Dimension ( = Weight Width )
+HiddenSize = 10		# Output Layer Dimension ( = Weight Width )
 Bias_zero = true
-Softmax = true
+Softmax = false
diff --git a/src/layers.cpp b/src/layers.cpp
index 273517d..424b95c 100644
--- a/src/layers.cpp
+++ b/src/layers.cpp
@@ -94,9 +94,10 @@ float ReluPrime(float x) {
   }
 }
 
-static void WeightInitialization(Tensor W, unsigned int width, unsigned int height, Layers::weightIni_type init_type) {
+static Tensor WeightInitialization(unsigned int width, unsigned int height, Layers::weightIni_type init_type) {
   std::random_device rd;
   std::mt19937 gen(rd());
+  Tensor W = Tensor(height, width);
 
   switch (init_type) {
     case Layers::WEIGHT_LECUN_NORMAL: {
@@ -148,9 +149,12 @@ static void WeightInitialization(Tensor W, unsigned int width, unsigned int heig
         }
     } break;
     default:
+      W.setZero();
       break;
   }
+  return W;
 }
+
 namespace Layers {
 
 void InputLayer::setOptimizer(Optimizer opt) {
@@ -206,10 +210,8 @@ void FullyConnectedLayer::initialize(int b, int h, int w, int id, bool init_zero
   this->init_zero = init_zero;
   this->bnfallow = false;
 
-  Weight = Tensor(h, w);
   Bias = Tensor(1, w);
-
-  WeightInitialization(Weight, w, h, wini);
+  Weight = WeightInitialization(w, h, wini);
 
   if (init_zero) {
     Bias.setZero();
@@ -286,7 +288,7 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
   Tensor dJdW = Input.transpose().dot(dJdB);
 
   if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
-    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+    dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda));
   }
 
   Tensor ret = dJdB.dot(Weight.transpose());
@@ -329,13 +331,12 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero, weight
   this->height = h;
   this->index = id;
   this->init_zero = init_zero;
-  Weight = Tensor(h, w);
+
   Bias = Tensor(1, w);
   this->cost = cost;
   this->bnfallow = false;
 
-  // Weight = Weight.applyFunction(random);
-  WeightInitialization(Weight, w, h, wini);
+  Weight = WeightInitialization(w, h, wini);
 
   if (init_zero) {
     Bias.setZero();
@@ -528,7 +529,7 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
   Tensor dJdW = Input.transpose().dot(dJdB);
 
   if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
-    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+    dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda));
   }
 
   ret = dJdB.dot(Weight.transpose());
diff --git a/src/neuralnet.cpp b/src/neuralnet.cpp
index 5c1a8d5..8a79799 100644
--- a/src/neuralnet.cpp
+++ b/src/neuralnet.cpp
@@ -59,14 +59,14 @@ namespace Network {
  *            "sgd"  : Stochestic Gradient Descent
  *            "adam" : Adaptive Moment Estimation
  */
-std::vector<std::string> Optimizer_string = {"sgd", "adam"};
+std::vector<std::string> Optimizer_string = {"sgd", "adam", "unkown"};
 
 /**
  * @brief     Cost Function String from configure file
  *            "msr"  : Mean Squared Roots
  *            "caterogical" : Categorical Cross Entropy
  */
-std::vector<std::string> Cost_string = {"categorical", "msr", "cross"};
+std::vector<std::string> Cost_string = {"categorical", "msr", "cross", "unkown"};
 
 /**
  * @brief     Network Type String from configure file
@@ -74,7 +74,7 @@ std::vector<std::string> Cost_string = {"categorical", "msr", "cross"};
  *            "regression" : Logistic Regression
  *            "neuralnet" : Neural Network
  */
-std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet"};
+std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet", "unkown"};
 
 /**
  * @brief     Activation Type String from configure file
@@ -82,7 +82,7 @@ std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet"}
  *            "sigmoid" : sigmoid
  *            "relu" : relu
  */
-std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu"};
+std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu", "unkown"};
 
 /**
  * @brief     Layer Type String from configure file
@@ -90,7 +90,7 @@ std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu"};
  *            "FullyConnectedLayer" : Fully Connected Layer Object
  *            "OutputLayer" : Output Layer Object
  */
-std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer"};
+std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer", "Unkown"};
 
 /**
  * @brief     Weight Initialization Type String from configure file
@@ -101,14 +101,14 @@ std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "O
  *            "he_normal"  : He Normal Initialization
  *            "he_uniform"  : He Uniform Initialization
  */
-std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+  std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform", "unkown"};
 
 /**
  * @brief     Weight Decay String from configure file
  *            "L2Norm"  : squared norm regularization
  *            "Regression" : Regression
  */
-std::vector<std::string> weight_decay_string = {"L2Norm", "Regression"};
+std::vector<std::string> weight_decay_string = {"l2norm", "regression", "unkown"};
 
 /**
  * @brief     Check Existance of File
@@ -245,10 +245,11 @@ void NeuralNetwork::init() {
   cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST);
   weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI);
 
-  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", NULL), TOKEN_WEIGHT_DECAY);
+  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", "Unknown"), TOKEN_WEIGHT_DECAY);
 
+  opt.weight_decay.lambda = 0.0;
   if (opt.weight_decay.type == Layers::WEIGHT_DECAY_L2NORM){
-    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:weight_decay_lambda", 0.0);
+    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:Weight_Decay_Lambda", 0.0);
   }
 
   model = iniparser_getstring(ini, "Network:Model", "model.bin");
-- 
2.7.4