typedef enum { ACT_TANH, ACT_SIGMOID, ACT_RELU, ACT_UNKNOWN } acti_type;
/**
+ * @brief Enumeration of Weight Decay type
+ * 0. L2Norm
+ * 1. Regression
+ * 2. Unknown
+ */
+typedef enum { WEIGHT_DECAY_L2NORM, WEIGHT_DECAY_REGRESSION, WEIGHT_DECAY_UNKNOWN } weight_decay_type;
+
+/**
* @brief Enumeration of layer type
* 0. Input Layer type
* 1. Fully Connected Layer type
* @brief type for the Optimizor to save hyper-parameter
*/
typedef struct {
+ weight_decay_type type;
+ float lambda;
+} Weight_Decay_param;
+
+/**
+ * @brief type for the Optimizor to save hyper-parameter
+ */
+typedef struct {
opt_type type;
float learning_rate;
double beta1;
acti_type activation;
float decay_rate;
float decay_steps;
+ Weight_Decay_param weight_decay;
} Optimizer;
/**
* 3. ACTI ( Activation Token )
* 4. LAYER ( Layer Token )
* 5. WEIGHTINI ( Weight Initialization Token )
- * 6. UNKNOWN
+ * 7. WEIGHT_DECAY ( Weight Decay Token )
+ * 8. UNKNOWN
*/
-typedef enum { TOKEN_OPT, TOKEN_COST, TOKEN_NET, TOKEN_ACTI, TOKEN_LAYER, TOKEN_WEIGHTINI, TOKEN_UNKNOWN } input_type;
-
+typedef enum {
+ TOKEN_OPT,
+ TOKEN_COST,
+ TOKEN_NET,
+ TOKEN_ACTI,
+ TOKEN_LAYER,
+ TOKEN_WEIGHTINI,
+ TOKEN_WEIGHT_DECAY,
+ TOKEN_UNKNOWN
+} input_type;
/**
* @class NeuralNetwork Class
Tensor softmax() const;
/**
+ * @brief l2norm the Tensor elements
+ * @retval Calculated l2norm
+ */
+ float l2norm() const;
+
+ /**
* @brief Normalize the Tensor elements
* @retval Calculated Tensor
*/
Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
Tensor dJdB = derivative.multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
Tensor dJdW = Input.transpose().dot(dJdB);
+
+ if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+ dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+ }
+
Tensor ret = dJdB.dot(Weight.transpose());
float ll = opt.learning_rate;
.subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float))));
loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
+ if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+ loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+ }
+
} break;
case COST_MSR: {
Tensor sub = Y2.subtract(Y);
}
loss = lossSum / (float)l.getBatch();
+ if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+ loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+ }
dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
} break;
lossSum += t[i];
}
loss = lossSum / (float)l.getBatch();
+
+ if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+ loss += opt.weight_decay.lambda * 0.5 * (Weight.l2norm());
+ }
+
} break;
case COST_UNKNOWN:
default:
}
Tensor dJdW = Input.transpose().dot(dJdB);
+
+ if (opt.weight_decay.type == WEIGHT_DECAY_L2NORM) {
+ dJdW = dJdW.subtract(Weight.multiply(opt.weight_decay.lambda));
+ }
+
ret = dJdB.dot(Weight.transpose());
switch (opt.type) {
* "he_normal" : He Normal Initialization
* "he_uniform" : He Uniform Initialization
*/
- std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+std::vector<std::string> weightini_string = {"lecun_normal", "lecun_uniform", "xavier_normal", "xavier_uniform", "he_normal", "he_uniform"};
+
+/**
+ * @brief Weight Decay String from configure file
+ * "L2Norm" : squared norm regularization
+ * "Regression" : Regression
+ */
+std::vector<std::string> weight_decay_string = {"L2Norm", "Regression"};
/**
* @brief Check Existance of File
}
ret = i - 1;
break;
+ case TOKEN_WEIGHT_DECAY:
+ for (i = 0; i < weight_decay_string.size(); i++) {
+ if (caseInSensitiveCompare(weight_decay_string[i], ll)) {
+ return (i);
+ }
+ }
+ ret = i - 1;
+ break;
case TOKEN_UNKNOWN:
default:
ret = 3;
cost = (Layers::cost_type)parseType(iniparser_getstring(ini, "Network:Cost", NULL), TOKEN_COST);
weightini = (Layers::weightIni_type)parseType(iniparser_getstring(ini, "Network:WeightIni", "xavier_normal"), TOKEN_WEIGHTINI);
+ opt.weight_decay.type = (Layers::weight_decay_type)parseType(iniparser_getstring(ini, "Network:Weight_Decay", NULL), TOKEN_WEIGHT_DECAY);
+
+ if (opt.weight_decay.type == Layers::WEIGHT_DECAY_L2NORM){
+ opt.weight_decay.lambda = iniparser_getdouble(ini, "Network:weight_decay_lambda", 0.0);
+ }
+
model = iniparser_getstring(ini, "Network:Model", "model.bin");
batchsize = iniparser_getint(ini, "Network:minibatch", 1);
return index;
}
+float Tensor::l2norm() const {
+ float sum = 0.0;
+ for(int i=0;i<len;i++){
+ sum += this->data[i] * this->data[i];
+ }
+
+ return sqrt(sum);
+}
+
Tensor Tensor::normalization() const {
Tensor results(batch, height, width);
float Min = 1000000.0;