Add Weight Decay (L2Norm)
authorjijoong.moon <jijoong.moon@samsung.com>
Mon, 16 Mar 2020 04:50:12 +0000 (13:50 +0900)
committer문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
Mon, 16 Mar 2020 07:21:24 +0000 (16:21 +0900)
Implement Weight Decay ( L2Norm Only )

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
include/layers.h
include/neuralnet.h
include/tensor.h
src/layers.cpp
src/neuralnet.cpp
src/tensor.cpp

index 3c5c0de..cb57624 100644 (file)
@@ -60,6 +60,14 @@ typedef enum { COST_CATEGORICAL, COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_typ
 typedef enum { ACT_TANH, ACT_SIGMOID, ACT_RELU, ACT_UNKNOWN } acti_type;
 
 /**
+ * @brief     Enumeration of Weight Decay type
+ *            0. L2Norm
+ *            1. Regression
+ *            2. Unknown
+ */
+typedef enum { WEIGHT_DECAY_L2NORM, WEIGHT_DECAY_REGRESSION, WEIGHT_DECAY_UNKNOWN } weight_decay_type;
+
+/**
  * @brief     Enumeration of layer type
  *            0. Input Layer type
  *            1. Fully Connected Layer type
@@ -91,6 +99,14 @@ typedef enum {
  * @brief     type for the Optimizor to save hyper-parameter
  */
 typedef struct {
+  weight_decay_type type;
+  float lambda;
+} Weight_Decay_param;
+
+/**
+ * @brief     type for the Optimizor to save hyper-parameter
+ */
+typedef struct {
   opt_type type;
   float learning_rate;
   double beta1;
@@ -99,6 +115,7 @@ typedef struct {
   acti_type activation;
   float decay_rate;
   float decay_steps;
+  Weight_Decay_param weight_decay;
 } Optimizer;
 
 /**
index fa226dc..f0dcf19 100644 (file)
@@ -52,10 +52,19 @@ typedef enum { NET_KNN, NET_REG, NET_NEU, NET_UNKNOWN } net_type;
  *            3. ACTI    ( Activation Token )
  *            4. LAYER   ( Layer Token )
  *            5. WEIGHTINI  ( Weight Initialization Token )
- *            6. UNKNOWN
+ *            7. WEIGHT_DECAY  ( Weight Decay Token )
+ *            8. UNKNOWN
  */
-typedef enum { TOKEN_OPT, TOKEN_COST, TOKEN_NET, TOKEN_ACTI, TOKEN_LAYER, TOKEN_WEIGHTINI, TOKEN_UNKNOWN } input_type;
-
+typedef enum {
+  TOKEN_OPT,
+  TOKEN_COST,
+  TOKEN_NET,
+  TOKEN_ACTI,
+  TOKEN_LAYER,
+  TOKEN_WEIGHTINI,
+  TOKEN_WEIGHT_DECAY,
+  TOKEN_UNKNOWN
+} input_type;
 
 /**
  * @class   NeuralNetwork Class
index e98c35f..61a610a 100644 (file)
@@ -176,6 +176,12 @@ class Tensor {
   Tensor softmax() const;
 
   /**
+   * @brief     l2norm the Tensor elements
+   * @retval    Calculated l2norm
+   */
+  float l2norm() const;
+
+  /**
    * @brief     Normalize the Tensor elements
    * @retval    Calculated Tensor
    */
index 3a4e148..273517d 100644 (file)
@@ -284,6 +284,11 @@ void FullyConnectedLayer::copy(Layer *l) {
 Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
   Tensor dJdB = derivative.multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
   Tensor dJdW = Input.transpose().dot(dJdB);
+
+  if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+  }
+
   Tensor ret = dJdB.dot(Weight.transpose());
 
   float ll = opt.learning_rate;
@@ -469,6 +474,10 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
                          .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
                              Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float))));
       loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
+      if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+        loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+      }
+
     } break;
     case COST_MSR: {
       Tensor sub = Y2.subtract(Y);
@@ -479,6 +488,9 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
       }
 
       loss = lossSum / (float)l.getBatch();
+      if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+        loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+      }
 
       dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
     } break;
@@ -502,6 +514,11 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
         lossSum += t[i];
       }
       loss = lossSum / (float)l.getBatch();
+
+      if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+        loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+      }
+
     } break;
     case COST_UNKNOWN:
     default:
@@ -509,6 +526,11 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
   }
 
   Tensor dJdW = Input.transpose().dot(dJdB);
+
+  if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+  }
+
   ret = dJdB.dot(Weight.transpose());
 
   switch (opt.type) {
index 9e21407..5c1a8d5 100644 (file)
@@ -101,7 +101,14 @@ std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "O
  *            "he_normal"  : He Normal Initialization
  *            "he_uniform"  : He Uniform Initialization
  */
-  std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+
+/**
+ * @brief     Weight Decay String from configure file
+ *            "L2Norm"  : squared norm regularization
+ *            "Regression" : Regression
+ */
+std::vector<std::string> weight_decay_string = {"L2Norm", "Regression"};
 
 /**
  * @brief     Check Existance of File
@@ -190,6 +197,14 @@ unsigned int parseType(std::string ll, input_type t) {
       }
       ret = i - 1;
       break;
+    case TOKEN_WEIGHT_DECAY:
+      for (i = 0; i < weight_decay_string.size(); i++) {
+        if (caseInSensitiveCompare(weight_decay_string[i], ll)) {
+          return (i);
+        }
+      }
+      ret = i - 1;
+      break;
     case TOKEN_UNKNOWN:
     default:
       ret = 3;
@@ -230,6 +245,12 @@ void NeuralNetwork::init() {
   cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST);
   weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI);
 
+  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", NULL), TOKEN_WEIGHT_DECAY);
+
+  if (opt.weight_decay.type == Layers::WEIGHT_DECAY_L2NORM){
+    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:weight_decay_lambda", 0.0);
+  }
+
   model = iniparser_getstring(ini, "Network:Model", "model.bin");
   batchsize = iniparser_getint(ini, "Network:minibatch", 1);
 
index 79ac2e1..136df9d 100644 (file)
@@ -563,6 +563,15 @@ int Tensor::argmax() {
   return index;
 }
 
+float Tensor::l2norm() const {
+  float sum = 0.0;
+  for(int i=0;i<len;i++){
+    sum += this->data[i] * this->data[i];
+  }
+
+  return sqrt(sum);
+}
+
 Tensor Tensor::normalization() const {
   Tensor results(batch, height, width);
   float Min = 1000000.0;