Add Weight Decay (L2Norm)

author jijoong.moon <jijoong.moon@samsung.com>

Mon, 16 Mar 2020 04:50:12 +0000 (13:50 +0900)

committer 문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>

Mon, 16 Mar 2020 07:21:24 +0000 (16:21 +0900)
author jijoong.moon <jijoong.moon@samsung.com>
Mon, 16 Mar 2020 04:50:12 +0000 (13:50 +0900)
committer 문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
Mon, 16 Mar 2020 07:21:24 +0000 (16:21 +0900)
diff --git a/include/layers.h b/include/layers.h

index 3c5c0de..cb57624 100644 (file)
--- a/include/layers.h
+++ b/include/layers.h
@@ -60,6 +60,14 @@ typedef enum { COST_CATEGORICAL, COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_typ
  typedef enum { ACT_TANH, ACT_SIGMOID, ACT_RELU, ACT_UNKNOWN } acti_type;
  
  /**
+ * @brief     Enumeration of Weight Decay type
+ *            0. L2Norm
+ *            1. Regression
+ *            2. Unknown
+ */
+typedef enum { WEIGHT_DECAY_L2NORM, WEIGHT_DECAY_REGRESSION, WEIGHT_DECAY_UNKNOWN } weight_decay_type;
+
+/**
   * @brief     Enumeration of layer type
   *            0. Input Layer type
   *            1. Fully Connected Layer type
@@ -91,6 +99,14 @@ typedef enum {
   * @brief     type for the Optimizor to save hyper-parameter
   */
  typedef struct {
+  weight_decay_type type;
+  float lambda;
+} Weight_Decay_param;
+
+/**
+ * @brief     type for the Optimizor to save hyper-parameter
+ */
+typedef struct {
    opt_type type;
    float learning_rate;
    double beta1;
@@ -99,6 +115,7 @@ typedef struct {
    acti_type activation;
    float decay_rate;
    float decay_steps;
+  Weight_Decay_param weight_decay;
  } Optimizer;
  
  /**
diff --git a/include/neuralnet.h b/include/neuralnet.h

index fa226dc..f0dcf19 100644 (file)
--- a/include/neuralnet.h
+++ b/include/neuralnet.h
@@ -52,10 +52,19 @@ typedef enum { NET_KNN, NET_REG, NET_NEU, NET_UNKNOWN } net_type;
   *            3. ACTI    ( Activation Token )
   *            4. LAYER   ( Layer Token )
   *            5. WEIGHTINI  ( Weight Initialization Token )
- *            6. UNKNOWN
+ *            7. WEIGHT_DECAY  ( Weight Decay Token )
+ *            8. UNKNOWN
   */
-typedef enum { TOKEN_OPT, TOKEN_COST, TOKEN_NET, TOKEN_ACTI, TOKEN_LAYER, TOKEN_WEIGHTINI, TOKEN_UNKNOWN } input_type;
-
+typedef enum {
+  TOKEN_OPT,
+  TOKEN_COST,
+  TOKEN_NET,
+  TOKEN_ACTI,
+  TOKEN_LAYER,
+  TOKEN_WEIGHTINI,
+  TOKEN_WEIGHT_DECAY,
+  TOKEN_UNKNOWN
+} input_type;
  
  /**
   * @class   NeuralNetwork Class
diff --git a/include/tensor.h b/include/tensor.h

index e98c35f..61a610a 100644 (file)
--- a/include/tensor.h
+++ b/include/tensor.h
@@ -176,6 +176,12 @@ class Tensor {
    Tensor softmax() const;
  
    /**
+   * @brief     l2norm the Tensor elements
+   * @retval    Calculated l2norm
+   */
+  float l2norm() const;
+
+  /**
     * @brief     Normalize the Tensor elements
     * @retval    Calculated Tensor
     */
diff --git a/src/layers.cpp b/src/layers.cpp

index 3a4e148..273517d 100644 (file)
--- a/src/layers.cpp
+++ b/src/layers.cpp
@@ -284,6 +284,11 @@ void FullyConnectedLayer::copy(Layer *l) {
  Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
    Tensor dJdB = derivative.multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
    Tensor dJdW = Input.transpose().dot(dJdB);
+
+  if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+  }
+
    Tensor ret = dJdB.dot(Weight.transpose());
  
    float ll = opt.learning_rate;
@@ -469,6 +474,10 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
                           .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
                               Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float))));
        loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
+      if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+        loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+      }
+
      } break;
      case COST_MSR: {
        Tensor sub = Y2.subtract(Y);
@@ -479,6 +488,9 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
        }
  
        loss = lossSum / (float)l.getBatch();
+      if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+        loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+      }
  
        dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
      } break;
@@ -502,6 +514,11 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
          lossSum += t[i];
        }
        loss = lossSum / (float)l.getBatch();
+
+      if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+        loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+      }
+
      } break;
      case COST_UNKNOWN:
      default:
@@ -509,6 +526,11 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
    }
  
    Tensor dJdW = Input.transpose().dot(dJdB);
+
+  if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+    dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+  }
+
    ret = dJdB.dot(Weight.transpose());
  
    switch (opt.type) {
diff --git a/src/neuralnet.cpp b/src/neuralnet.cpp

index 9e21407..5c1a8d5 100644 (file)
--- a/src/neuralnet.cpp
+++ b/src/neuralnet.cpp
@@ -101,7 +101,14 @@ std::vector<std::string> layer_string = {"InputLayer", "FullyConnectedLayer", "O
   *            "he_normal"  : He Normal Initialization
   *            "he_uniform"  : He Uniform Initialization
   */
-  std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+
+/**
+ * @brief     Weight Decay String from configure file
+ *            "L2Norm"  : squared norm regularization
+ *            "Regression" : Regression
+ */
+std::vector<std::string> weight_decay_string = {"L2Norm", "Regression"};
  
  /**
   * @brief     Check Existance of File
@@ -190,6 +197,14 @@ unsigned int parseType(std::string ll, input_type t) {
        }
        ret = i - 1;
        break;
+    case TOKEN_WEIGHT_DECAY:
+      for (i = 0; i < weight_decay_string.size(); i++) {
+        if (caseInSensitiveCompare(weight_decay_string[i], ll)) {
+          return (i);
+        }
+      }
+      ret = i - 1;
+      break;
      case TOKEN_UNKNOWN:
      default:
        ret = 3;
@@ -230,6 +245,12 @@ void NeuralNetwork::init() {
    cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST);
    weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI);
  
+  opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", NULL), TOKEN_WEIGHT_DECAY);
+
+  if (opt.weight_decay.type == Layers::WEIGHT_DECAY_L2NORM){
+    opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:weight_decay_lambda", 0.0);
+  }
+
    model = iniparser_getstring(ini, "Network:Model", "model.bin");
    batchsize = iniparser_getint(ini, "Network:minibatch", 1);
  
diff --git a/src/tensor.cpp b/src/tensor.cpp

index 79ac2e1..136df9d 100644 (file)
--- a/src/tensor.cpp
+++ b/src/tensor.cpp
@@ -563,6 +563,15 @@ int Tensor::argmax() {
    return index;
  }
  
+float Tensor::l2norm() const {
+  float sum = 0.0;
+  for(int i=0;i<len;i++){
+    sum += this->data[i] * this->data[i];
+  }
+
+  return sqrt(sum);
+}
+
  Tensor Tensor::normalization() const {
    Tensor results(batch, height, width);
    float Min = 1000000.0;
author	jijoong.moon <jijoong.moon@samsung.com>
	Mon, 16 Mar 2020 04:50:12 +0000 (13:50 +0900)
committer	문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
	Mon, 16 Mar 2020 07:21:24 +0000 (16:21 +0900)
include/layers.h		patch \| blob \| history
include/neuralnet.h		patch \| blob \| history
include/tensor.h		patch \| blob \| history
src/layers.cpp		patch \| blob \| history
src/neuralnet.cpp		patch \| blob \| history
src/tensor.cpp		patch \| blob \| history