Fix Weight Decay L2Norm
authorjijoong.moon <jijoong.moon@samsung.com>
Tue, 17 Mar 2020 00:40:34 +0000 (09:40 +0900)
committer문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
Wed, 18 Mar 2020 00:33:39 +0000 (09:33 +0900)
There is wrong implementation about weight decay l2norm.
In this pr, it is fixed

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
Applications/Classification/res/Classification.ini
src/layers.cpp
src/neuralnet.cpp

index 7998ef0..f2c96e2 100644 (file)
@@ -2,42 +2,35 @@
 [Network]
 Type = NeuralNetwork   # Network Type : Regression, KNN, NeuralNetwork
 Layers = inputlayer \
-         fc1layer \
         outputlayer    #Layers of Neuralnetwork
-Learning_rate = 0.001  # Learning Rate
+Learning_rate = 0.0001         # Learning Rate
 Decay_rate = 0.96      # for the decay_rate for the decayed learning rate
 Decay_steps = 1000       # decay step for the exponential decayed learning rate
 Epoch = 30000          # Epoch 
-Optimizer = sgd        # Optimizer : sgd (stochastic gradien decent),
+Optimizer = adam       # Optimizer : sgd (stochastic gradien decent),
                        #             adam (Adamtive Moment Estimation)
 Activation = sigmoid   # activation : sigmoid, tanh
-Cost = cross                   # Cost(loss) function : msr (mean square root error)
+Cost = cross           # Cost(loss) function : msr (mean square root error)
+Weight_Decay = l2norm
+weight_Decay_Lambda = 0.005
                         #                       categorical ( for logistic regression )
 Model = "model.bin"    # model path to save / read
 minibatch = 32         # mini batch size
 beta1 = 0.9            # beta 1 for adam
 beta2 = 0.9999 # beta 2 for adam
-epsilon = 1e-8 # epsilon for adam
+epsilon = 1e-7 # epsilon for adam
 
 # Layer Section : Name
 [inputlayer]
 Type = InputLayer
 Id = 0                 # Layer Id
-Height = 1             
-Width = 62720          # Input Layer Dimension
+HiddenSize = 62720             # Input Layer Dimension
 Bias_zero = true       # Zero Bias
-
-[fc1layer]
-Type = FullyConnectedLayer
-Id = 1
-Height = 62720         # Input Dimension ( = Weight Height )
-Width = 128            # Hidden Layer Dimension ( = Weight Width )
-Bias_zero = true
+Normalization = true
 
 [outputlayer]
 Type = OutputLayer
 Id = 2
-Height = 128           # Hidden Layer Dimension ( = Weight Height )
-Width = 10             # Output Layer Dimension ( = Weight Width )
+HiddenSize = 10                # Output Layer Dimension ( = Weight Width )
 Bias_zero = true
-Softmax = true
+Softmax = false
index 273517d..424b95c 100644 (file)
@@ -94,9 +94,10 @@ float ReluPrime(float x) {
   }
 }
 
-static void WeightInitialization(Tensor W, unsigned int width, unsigned int height, Layers::weightIni_type init_type) {
+static Tensor WeightInitialization(unsigned int width, unsigned int height, Layers::weightIni_type init_type) {
   std::random_device rd;
   std::mt19937 gen(rd());
+  Tensor W = Tensor(height, width);
 
   switch (init_type) {
     case Layers::WEIGHT_LECUN_NORMAL: {
@@ -148,9 +149,12 @@ static void WeightInitialization(Tensor W, unsigned int width, unsigned int heig
         }
     } break;
     default:
+      W.setZero();
       break;
   }
+  return W;
 }
+
 namespace Layers {
 
 void InputLayer::setOptimizer(Optimizer opt) {
@@ -206,10 +210,8 @@ void FullyConnectedLayer::initialize(int b, int h, int w, int id, bool init_zero
   this->init_zero = init_zero;
   this->bnfallow = false;
 
-  Weight = Tensor(h, w);
   Bias = Tensor(1, w);
-
-  WeightInitialization(Weight, w, h, wini);
+  Weight = WeightInitialization(w, h, wini);
 
   if (init_zero) {
     Bias.setZero();
@@ -286,7 +288,7 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
   Tensor dJdW = Input.transpose().dot(dJdB);
 
   if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
-    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+    dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda));
   }
 
   Tensor ret = dJdB.dot(Weight.transpose());
@@ -329,13 +331,12 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero, weight
   this->height = h;
   this->index = id;
   this->init_zero = init_zero;
-  Weight = Tensor(h, w);
+
   Bias = Tensor(1, w);
   this->cost = cost;
   this->bnfallow = false;
 
-  // Weight = Weight.applyFunction(random);
-  WeightInitialization(Weight, w, h, wini);
+  Weight = WeightInitialization(w, h, wini);
 
   if (init_zero) {
     Bias.setZero();
@@ -528,7 +529,7 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
   Tensor dJdW = Input.transpose().dot(dJdB);
 
   if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
-    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+    dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda));
   }
 
   ret = dJdB.dot(Weight.transpose());
index 5c1a8d5..8a79799 100644 (file)
@@ -59,14 +59,14 @@ namespace Network {
  *            "sgd"  : Stochestic Gradient Descent
  *            "adam" : Adaptive Moment Estimation
  */
-std::vector<std::string> Optimizer_string = {"sgd", "adam"};
+std::vector<std::string> Optimizer_string = {"sgd", "adam", "unkown"};
 
 /**
  * @brief     Cost Function String from configure file
  *            "msr"  : Mean Squared Roots
  *            "caterogical" : Categorical Cross Entropy
  */
-std::vector<std::string> Cost_string = {"categorical", "msr", "cross"};
+std::vector<std::string> Cost_string = {"categorical", "msr", "cross", "unkown"};
 
 /**
  * @brief     Network Type String from configure file
@@ -74,7 +74,7 @@ std::vector<std::string> Cost_string = {"categorical", "msr", "cross"};
  *            "regression" : Logistic Regression
  *            "neuralnet" : Neural Network
  */
-std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet"};
+std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet", "unkown"};
 
 /**
  * @brief     Activation Type String from configure file
@@ -82,7 +82,7 @@ std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet"}
  *            "sigmoid" : sigmoid
  *            "relu" : relu
  */
-std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu"};
+std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu", "unkown"};
 
 /**
  * @brief     Layer Type String from configure file
@@ -90,7 +90,7 @@ std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu"};
  *            "FullyConnectedLayer" : Fully Connected Layer Object
  *            "OutputLayer" : Output Layer Object
  */
-std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer"};
+std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer", "Unkown"};
 
 /**
  * @brief     Weight Initialization Type String from configure file
@@ -101,14 +101,14 @@ std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "O
  *            "he_normal"  : He Normal Initialization
  *            "he_uniform"  : He Uniform Initialization
  */
-std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+  std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform", "unkown"};
 
 /**
  * @brief     Weight Decay String from configure file
  *            "L2Norm"  : squared norm regularization
  *            "Regression" : Regression
  */
-std::vector<std::string> weight_decay_string = {"L2Norm", "Regression"};
+std::vector<std::string> weight_decay_string = {"l2norm", "regression", "unkown"};
 
 /**
  * @brief     Check Existance of File
@@ -245,10 +245,11 @@ void NeuralNetwork::init() {
   cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST);
   weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI);
 
-  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", NULL), TOKEN_WEIGHT_DECAY);
+  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", "Unknown"), TOKEN_WEIGHT_DECAY);
 
+  opt.weight_decay.lambda = 0.0;
   if (opt.weight_decay.type == Layers::WEIGHT_DECAY_L2NORM){
-    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:weight_decay_lambda", 0.0);
+    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:Weight_Decay_Lambda", 0.0);
   }
 
   model = iniparser_getstring(ini, "Network:Model", "model.bin");