Fix Weight Decay L2Norm

author jijoong.moon <jijoong.moon@samsung.com>

Tue, 17 Mar 2020 00:40:34 +0000 (09:40 +0900)

committer 문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>

Wed, 18 Mar 2020 00:33:39 +0000 (09:33 +0900)
author jijoong.moon <jijoong.moon@samsung.com>
Tue, 17 Mar 2020 00:40:34 +0000 (09:40 +0900)
committer 문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
Wed, 18 Mar 2020 00:33:39 +0000 (09:33 +0900)
diff --git a/Applications/Classification/res/Classification.ini b/Applications/Classification/res/Classification.ini

index 7998ef0..f2c96e2 100644 (file)
--- a/Applications/Classification/res/Classification.ini
+++ b/Applications/Classification/res/Classification.ini
@@ -2,42 +2,35 @@
  [Network]
  Type = NeuralNetwork   # Network Type : Regression, KNN, NeuralNetwork
  Layers = inputlayer \
-         fc1layer \
          outputlayer    #Layers of Neuralnetwork
-Learning_rate = 0.001  # Learning Rate
+Learning_rate = 0.0001         # Learning Rate
  Decay_rate = 0.96      # for the decay_rate for the decayed learning rate
  Decay_steps = 1000       # decay step for the exponential decayed learning rate
  Epoch = 30000          # Epoch 
-Optimizer = sgd        # Optimizer : sgd (stochastic gradien decent),
+Optimizer = adam       # Optimizer : sgd (stochastic gradien decent),
                         #             adam (Adamtive Moment Estimation)
  Activation = sigmoid   # activation : sigmoid, tanh
-Cost = cross                   # Cost(loss) function : msr (mean square root error)
+Cost = cross           # Cost(loss) function : msr (mean square root error)
+Weight_Decay = l2norm
+weight_Decay_Lambda = 0.005
                          #                       categorical ( for logistic regression )
  Model = "model.bin"    # model path to save / read
  minibatch = 32         # mini batch size
  beta1 = 0.9            # beta 1 for adam
  beta2 = 0.9999 # beta 2 for adam
-epsilon = 1e-8 # epsilon for adam
+epsilon = 1e-7 # epsilon for adam
  
  # Layer Section : Name
  [inputlayer]
  Type = InputLayer
  Id = 0                 # Layer Id
-Height = 1             
-Width = 62720          # Input Layer Dimension
+HiddenSize = 62720             # Input Layer Dimension
  Bias_zero = true       # Zero Bias
-
-[fc1layer]
-Type = FullyConnectedLayer
-Id = 1
-Height = 62720         # Input Dimension ( = Weight Height )
-Width = 128            # Hidden Layer Dimension ( = Weight Width )
-Bias_zero = true
+Normalization = true
  
  [outputlayer]
  Type = OutputLayer
  Id = 2
-Height = 128           # Hidden Layer Dimension ( = Weight Height )
-Width = 10             # Output Layer Dimension ( = Weight Width )
+HiddenSize = 10                # Output Layer Dimension ( = Weight Width )
  Bias_zero = true
-Softmax = true
+Softmax = false
diff --git a/src/layers.cpp b/src/layers.cpp

index 273517d..424b95c 100644 (file)
--- a/src/layers.cpp
+++ b/src/layers.cpp
@@ -94,9 +94,10 @@ float ReluPrime(float x) {
    }
  }
  
-static void WeightInitialization(Tensor W, unsigned int width, unsigned int height, Layers::weightIni_type init_type) {
+static Tensor WeightInitialization(unsigned int width, unsigned int height, Layers::weightIni_type init_type) {
    std::random_device rd;
    std::mt19937 gen(rd());
+  Tensor W = Tensor(height, width);
  
    switch (init_type) {
      case Layers::WEIGHT_LECUN_NORMAL: {
@@ -148,9 +149,12 @@ static void WeightInitialization(Tensor W, unsigned int width, unsigned int heig
          }
      } break;
      default:
+      W.setZero();
        break;
    }
+  return W;
  }
+
  namespace Layers {
  
  void InputLayer::setOptimizer(Optimizer opt) {
@@ -206,10 +210,8 @@ void FullyConnectedLayer::initialize(int b, int h, int w, int id, bool init_zero
    this->init_zero = init_zero;
    this->bnfallow = false;
  
-  Weight = Tensor(h, w);
    Bias = Tensor(1, w);
-
-  WeightInitialization(Weight, w, h, wini);
+  Weight = WeightInitialization(w, h, wini);
  
    if (init_zero) {
      Bias.setZero();
@@ -286,7 +288,7 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
    Tensor dJdW = Input.transpose().dot(dJdB);
  
    if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
-    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+    dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda));
    }
  
    Tensor ret = dJdB.dot(Weight.transpose());
@@ -329,13 +331,12 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero, weight
    this->height = h;
    this->index = id;
    this->init_zero = init_zero;
-  Weight = Tensor(h, w);
+
    Bias = Tensor(1, w);
    this->cost = cost;
    this->bnfallow = false;
  
-  // Weight = Weight.applyFunction(random);
-  WeightInitialization(Weight, w, h, wini);
+  Weight = WeightInitialization(w, h, wini);
  
    if (init_zero) {
      Bias.setZero();
@@ -528,7 +529,7 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
    Tensor dJdW = Input.transpose().dot(dJdB);
  
    if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
-    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+    dJdW = dJdW.add(Weight.multiply(opt.weight_decay.lambda));
    }
  
    ret = dJdB.dot(Weight.transpose());
diff --git a/src/neuralnet.cpp b/src/neuralnet.cpp

index 5c1a8d5..8a79799 100644 (file)
--- a/src/neuralnet.cpp
+++ b/src/neuralnet.cpp
@@ -59,14 +59,14 @@ namespace Network {
   *            "sgd"  : Stochestic Gradient Descent
   *            "adam" : Adaptive Moment Estimation
   */
-std::vector<std::string> Optimizer_string = {"sgd", "adam"};
+std::vector<std::string> Optimizer_string = {"sgd", "adam", "unkown"};
  
  /**
   * @brief     Cost Function String from configure file
   *            "msr"  : Mean Squared Roots
   *            "caterogical" : Categorical Cross Entropy
   */
-std::vector<std::string> Cost_string = {"categorical", "msr", "cross"};
+std::vector<std::string> Cost_string = {"categorical", "msr", "cross", "unkown"};
  
  /**
   * @brief     Network Type String from configure file
@@ -74,7 +74,7 @@ std::vector<std::string> Cost_string = {"categorical", "msr", "cross"};
   *            "regression" : Logistic Regression
   *            "neuralnet" : Neural Network
   */
-std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet"};
+std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet", "unkown"};
  
  /**
   * @brief     Activation Type String from configure file
@@ -82,7 +82,7 @@ std::vector<std::string> NetworkType_string = {"knn", "regression", "neuralnet"}
   *            "sigmoid" : sigmoid
   *            "relu" : relu
   */
-std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu"};
+std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu", "unkown"};
  
  /**
   * @brief     Layer Type String from configure file
@@ -90,7 +90,7 @@ std::vector<std::string> activation_string = {"tanh", "sigmoid", "relu"};
   *            "FullyConnectedLayer" : Fully Connected Layer Object
   *            "OutputLayer" : Output Layer Object
   */
-std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer"};
+std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "OutputLayer", "BatchNormalizationLayer", "Unkown"};
  
  /**
   * @brief     Weight Initialization Type String from configure file
@@ -101,14 +101,14 @@ std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "O
   *            "he_normal"  : He Normal Initialization
   *            "he_uniform"  : He Uniform Initialization
   */
-std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+  std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform", "unkown"};
  
  /**
   * @brief     Weight Decay String from configure file
   *            "L2Norm"  : squared norm regularization
   *            "Regression" : Regression
   */
-std::vector<std::string> weight_decay_string = {"L2Norm", "Regression"};
+std::vector<std::string> weight_decay_string = {"l2norm", "regression", "unkown"};
  
  /**
   * @brief     Check Existance of File
@@ -245,10 +245,11 @@ void NeuralNetwork::init() {
    cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST);
    weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI);
  
-  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", NULL), TOKEN_WEIGHT_DECAY);
+  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", "Unknown"), TOKEN_WEIGHT_DECAY);
  
+  opt.weight_decay.lambda = 0.0;
    if (opt.weight_decay.type == Layers::WEIGHT_DECAY_L2NORM){
-    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:weight_decay_lambda", 0.0);
+    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:Weight_Decay_Lambda", 0.0);
    }
  
    model = iniparser_getstring(ini, "Network:Model", "model.bin");
author	jijoong.moon <jijoong.moon@samsung.com>
	Tue, 17 Mar 2020 00:40:34 +0000 (09:40 +0900)
committer	문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
	Wed, 18 Mar 2020 00:33:39 +0000 (09:33 +0900)
Applications/Classification/res/Classification.ini		patch \| blob \| history
src/layers.cpp		patch \| blob \| history
src/neuralnet.cpp		patch \| blob \| history