From abfb72a410d121f76fddbd51a497b483aea6955a Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Thu, 13 Feb 2020 10:50:11 +0900 Subject: [PATCH] Implement Cross Entropy Cost Function **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- Applications/Training/jni/main.cpp | 2 +- Applications/Training/res/Training.ini | 6 +-- include/layers.h | 9 +++-- src/layers.cpp | 72 +++++++++++++++++++++++----------- src/neuralnet.cpp | 2 +- src/tensor.cpp | 6 +-- 6 files changed, 62 insertions(+), 35 deletions(-) diff --git a/Applications/Training/jni/main.cpp b/Applications/Training/jni/main.cpp index af12f02..9423062 100644 --- a/Applications/Training/jni/main.cpp +++ b/Applications/Training/jni/main.cpp @@ -60,7 +60,7 @@ /** * @brief Max Epoch */ -#define ITERATION 300 +#define ITERATION 1000 using namespace std; diff --git a/Applications/Training/res/Training.ini b/Applications/Training/res/Training.ini index 0a4d196..591095d 100644 --- a/Applications/Training/res/Training.ini +++ b/Applications/Training/res/Training.ini @@ -4,12 +4,12 @@ Type = NeuralNetwork # Network Type : Regression, KNN, NeuralNetwork Layers = inputlayer \ fc1layer \ outputlayer #Layers of Neuralnetwork -Learning_rate = 0.7 # Learning Rate -Epoch = 300 # Epoch +Learning_rate = 0.01 # Learning Rate +Epoch = 100 # Epoch Optimizer = sgd # Optimizer : sgd (stochastic gradien decent), # adam (Adamtive Moment Estimation) Activation = sigmoid # activation : sigmoid, tanh -Cost = msr # Cost(loss) function : msr (mean square root error) +Cost = cross # Cost(loss) function : msr (mean square root error) # categorical ( for logistic regression ) Model = "model.bin" # model path to save / read minibatch = 1 # mini batch size diff --git a/include/layers.h b/include/layers.h index ffbae7e..12e1265 100644 --- a/include/layers.h +++ b/include/layers.h @@ -43,11 +43,12 @@ typedef enum { OPT_SGD, OPT_ADAM, OPT_UNKNOWN } opt_type; /** * @brief Enumeration of cost(loss) function type - * 0. MSR ( Mean Squared Roots ) - * 1. ENTROPY ( Categorical Cross Entropy ) - * 2. Unknown + * 0. CATEGORICAL ( Categorical Cross Entropy ) + * 1. MSR ( Mean Squared Roots ) + * 2. ENTROPY ( Cross Entropy ) + * 3. Unknown */ -typedef enum { COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_type; +typedef enum { COST_CATEGORICAL, COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_type; /** * @brief Enumeration of activation function type diff --git a/src/layers.cpp b/src/layers.cpp index f82b20b..38b151f 100644 --- a/src/layers.cpp +++ b/src/layers.cpp @@ -52,7 +52,9 @@ float sigmoid(float x) { return 1 / (1 + exp(-x)); } * @brief derivative sigmoid function * @param[in] x input */ -float sigmoidePrime(float x) { return (float)(1.0 / ((1 + exp(-x)) * (1.0 + 1.0 / (exp(-x) + 0.0000001)))); } +float sigmoidePrime(float x) { + return (float)(1.0 / ((1 + exp(-x)) * (1.0 + 1.0 / (exp(-x) + 0.0000001)))); +} /** * @brief tanh function for float type @@ -229,10 +231,7 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero) { Tensor OutputLayer::forwarding(Tensor input) { Input = input; - if (cost == COST_ENTROPY) - hidden = input.dot(Weight).applyFunction(activation); - else - hidden = input.dot(Weight).add(Bias).applyFunction(activation); + hidden = input.dot(Weight).add(Bias).applyFunction(activation); return hidden; } @@ -285,7 +284,7 @@ void OutputLayer::setOptimizer(Optimizer opt) { Tensor OutputLayer::backwarding(Tensor label, int iteration) { float lossSum = 0.0; Tensor Y2 = label; - Tensor Y = hidden; + Tensor Y = hidden.softmax(); Tensor ret; Tensor dJdB; @@ -294,23 +293,50 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) { ll = opt.learning_rate * pow(opt.decay_rate, (iteration / opt.decay_steps)); } - if (cost == COST_ENTROPY) { - dJdB = Y.subtract(Y2); - Tensor temp = ((Y2.multiply(-1.0).transpose().dot(Y.add(opt.epsilon).applyFunction(log_float))) - .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot( - Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float)))); - loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0]; - } else { - Tensor sub = Y2.subtract(Y); - Tensor l = (sub.multiply(sub)).sum().multiply(0.5); - std::vector t = l.Mat2Vec(); - for (int i = 0; i < l.getBatch(); i++) { - lossSum += t[i]; - } - - loss = lossSum / (float)l.getBatch(); - - dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime)); + switch (cost) { + case COST_CATEGORICAL: { + dJdB = Y.subtract(Y2); + Tensor temp = ((Y2.multiply(-1.0).transpose().dot(Y.add(opt.epsilon).applyFunction(log_float))) + .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot( + Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float)))); + loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0]; + } break; + case COST_MSR: { + Tensor sub = Y2.subtract(Y); + Tensor l = (sub.multiply(sub)).sum().multiply(0.5); + std::vector t = l.Mat2Vec(); + for (int i = 0; i < l.getBatch(); i++) { + lossSum += t[i]; + } + + loss = lossSum / (float)l.getBatch(); + + dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime)); + } break; + case COST_ENTROPY: { + if (activation == sigmoid) + dJdB = Y.subtract(Y2).multiply(1.0 / Y.getWidth()); + else + dJdB = (Y.subtract(Y2)) + .multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime)) + .divide(Y.multiply(Y.multiply(-1.0).add(1.0))) + .multiply(1.0 / Y.getWidth()); + + Tensor l = (Y2.multiply(Y.applyFunction(log_float)) + .add((Y2.multiply(-1.0).add(1.0)).multiply((Y.multiply(-1.0).add(1.0)).applyFunction(log_float)))) + .multiply(-1.0 / (Y2.getWidth())) + .sum(); + + std::vector t = l.Mat2Vec(); + + for (int i = 0; i < l.getBatch(); i++) { + lossSum += t[i]; + } + loss = lossSum / (float)l.getBatch(); + } break; + case COST_UNKNOWN: + default: + break; } Tensor dJdW = Input.transpose().dot(dJdB); diff --git a/src/neuralnet.cpp b/src/neuralnet.cpp index c580f7d..ecee386 100644 --- a/src/neuralnet.cpp +++ b/src/neuralnet.cpp @@ -66,7 +66,7 @@ std::vector Optimizer_string = {"sgd", "adam"}; * "msr" : Mean Squared Roots * "caterogical" : Categorical Cross Entropy */ -std::vector Cost_string = {"msr", "categorical"}; +std::vector Cost_string = {"categorical", "msr", "cross"}; /** * @brief Network Type String from configure file diff --git a/src/tensor.cpp b/src/tensor.cpp index 9af6800..a031b63 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -119,7 +119,7 @@ Tensor Tensor::add(float const &value) { tmp.data[i] = 1.0; cblas_saxpy(this->len, value, tmp.data.data(), 1, result.data.data(), 1); #else - for (int k = 0; k < batch; ++k) { + for (int k = 0; k < len; ++k) { result.data[k] = data[k] + value; } #endif @@ -245,7 +245,7 @@ Tensor Tensor::divide(Tensor const &m) const { result.data[b + i + 2] = this->data[b + i + 2] / m.data[i + 2]; result.data[b + i + 3] = this->data[b + i + 3] / m.data[i + 3]; } - for (int j = i - 1; j < width * height; ++j) + for (int j = i; j < width * height; ++j) result.data[b + j] = this->data[b + j] / m.data[j]; } } else { @@ -255,7 +255,7 @@ Tensor Tensor::divide(Tensor const &m) const { result.data[i + 2] = this->data[i + 2] / m.data[i + 2]; result.data[i + 3] = this->data[i + 3] / m.data[i + 3]; } - for (int j = i - 1; j < len; ++j) + for (int j = i; j < len; ++j) result.data[j] = this->data[j] / m.data[j]; } -- 2.7.4