From abfb72a410d121f76fddbd51a497b483aea6955a Mon Sep 17 00:00:00 2001
From: "jijoong.moon" <jijoong.moon@samsung.com>
Date: Thu, 13 Feb 2020 10:50:11 +0900
Subject: [PATCH] Implement Cross Entropy Cost Function

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
---
 Applications/Training/jni/main.cpp     |  2 +-
 Applications/Training/res/Training.ini |  6 +--
 include/layers.h                       |  9 +++--
 src/layers.cpp                         | 72 +++++++++++++++++++++++-----------
 src/neuralnet.cpp                      |  2 +-
 src/tensor.cpp                         |  6 +--
 6 files changed, 62 insertions(+), 35 deletions(-)

diff --git a/Applications/Training/jni/main.cpp b/Applications/Training/jni/main.cpp
index af12f02..9423062 100644
--- a/Applications/Training/jni/main.cpp
+++ b/Applications/Training/jni/main.cpp
@@ -60,7 +60,7 @@
 /**
  * @brief     Max Epoch
  */
-#define ITERATION 300
+#define ITERATION 1000
 
 using namespace std;
 
diff --git a/Applications/Training/res/Training.ini b/Applications/Training/res/Training.ini
index 0a4d196..591095d 100644
--- a/Applications/Training/res/Training.ini
+++ b/Applications/Training/res/Training.ini
@@ -4,12 +4,12 @@ Type = NeuralNetwork	# Network Type : Regression, KNN, NeuralNetwork
 Layers = inputlayer \
          fc1layer \
 	 outputlayer	#Layers of Neuralnetwork
-Learning_rate = 0.7 	# Learning Rate
-Epoch = 300		# Epoch 
+Learning_rate = 0.01 	# Learning Rate
+Epoch = 100		# Epoch 
 Optimizer = sgd		# Optimizer : sgd (stochastic gradien decent),
  	    		#             adam (Adamtive Moment Estimation)
 Activation = sigmoid 	# activation : sigmoid, tanh
-Cost = msr   		# Cost(loss) function : msr (mean square root error)
+Cost = cross   		# Cost(loss) function : msr (mean square root error)
                         #                       categorical ( for logistic regression )
 Model = "model.bin"  	# model path to save / read
 minibatch = 1		# mini batch size
diff --git a/include/layers.h b/include/layers.h
index ffbae7e..12e1265 100644
--- a/include/layers.h
+++ b/include/layers.h
@@ -43,11 +43,12 @@ typedef enum { OPT_SGD, OPT_ADAM, OPT_UNKNOWN } opt_type;
 
 /**
  * @brief     Enumeration of cost(loss) function type
- *            0. MSR ( Mean Squared Roots )
- *            1. ENTROPY ( Categorical Cross Entropy )
- *            2. Unknown
+ *            0. CATEGORICAL ( Categorical Cross Entropy )
+ *            1. MSR ( Mean Squared Roots )
+ *            2. ENTROPY ( Cross Entropy )
+ *            3. Unknown
  */
-typedef enum { COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_type;
+typedef enum { COST_CATEGORICAL, COST_MSR, COST_ENTROPY, COST_UNKNOWN } cost_type;
 
 /**
  * @brief     Enumeration of activation function type
diff --git a/src/layers.cpp b/src/layers.cpp
index f82b20b..38b151f 100644
--- a/src/layers.cpp
+++ b/src/layers.cpp
@@ -52,7 +52,9 @@ float sigmoid(float x) { return 1 / (1 + exp(-x)); }
  * @brief     derivative sigmoid function
  * @param[in] x input
  */
-float sigmoidePrime(float x) { return (float)(1.0 / ((1 + exp(-x)) * (1.0 + 1.0 / (exp(-x) + 0.0000001)))); }
+float sigmoidePrime(float x) {
+  return (float)(1.0 / ((1 + exp(-x)) * (1.0 + 1.0 / (exp(-x) + 0.0000001))));
+}
 
 /**
  * @brief     tanh function for float type
@@ -229,10 +231,7 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero) {
 
 Tensor OutputLayer::forwarding(Tensor input) {
   Input = input;
-  if (cost == COST_ENTROPY)
-    hidden = input.dot(Weight).applyFunction(activation);
-  else
-    hidden = input.dot(Weight).add(Bias).applyFunction(activation);
+  hidden = input.dot(Weight).add(Bias).applyFunction(activation);
   return hidden;
 }
 
@@ -285,7 +284,7 @@ void OutputLayer::setOptimizer(Optimizer opt) {
 Tensor OutputLayer::backwarding(Tensor label, int iteration) {
   float lossSum = 0.0;
   Tensor Y2 = label;
-  Tensor Y = hidden;
+  Tensor Y = hidden.softmax();
   Tensor ret;
   Tensor dJdB;
 
@@ -294,23 +293,50 @@ Tensor OutputLayer::backwarding(Tensor label, int iteration) {
     ll = opt.learning_rate * pow(opt.decay_rate, (iteration / opt.decay_steps));
   }
 
-  if (cost == COST_ENTROPY) {
-    dJdB = Y.subtract(Y2);
-    Tensor temp = ((Y2.multiply(-1.0).transpose().dot(Y.add(opt.epsilon).applyFunction(log_float)))
-                       .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
-                           Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float))));
-    loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
-  } else {
-    Tensor sub = Y2.subtract(Y);
-    Tensor l = (sub.multiply(sub)).sum().multiply(0.5);
-    std::vector<float> t = l.Mat2Vec();
-    for (int i = 0; i < l.getBatch(); i++) {
-      lossSum += t[i];
-    }
-
-    loss = lossSum / (float)l.getBatch();
-
-    dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
+  switch (cost) {
+    case COST_CATEGORICAL: {
+      dJdB = Y.subtract(Y2);
+      Tensor temp = ((Y2.multiply(-1.0).transpose().dot(Y.add(opt.epsilon).applyFunction(log_float)))
+                         .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
+                             Y.multiply(-1.0).add(1.0).add(opt.epsilon).applyFunction(log_float))));
+      loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
+    } break;
+    case COST_MSR: {
+      Tensor sub = Y2.subtract(Y);
+      Tensor l = (sub.multiply(sub)).sum().multiply(0.5);
+      std::vector<float> t = l.Mat2Vec();
+      for (int i = 0; i < l.getBatch(); i++) {
+        lossSum += t[i];
+      }
+
+      loss = lossSum / (float)l.getBatch();
+
+      dJdB = Y.subtract(Y2).multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime));
+    } break;
+    case COST_ENTROPY: {
+      if (activation == sigmoid)
+        dJdB = Y.subtract(Y2).multiply(1.0 / Y.getWidth());
+      else
+        dJdB = (Y.subtract(Y2))
+                   .multiply(Input.dot(Weight).add(Bias).applyFunction(activationPrime))
+                   .divide(Y.multiply(Y.multiply(-1.0).add(1.0)))
+                   .multiply(1.0 / Y.getWidth());
+
+      Tensor l = (Y2.multiply(Y.applyFunction(log_float))
+                      .add((Y2.multiply(-1.0).add(1.0)).multiply((Y.multiply(-1.0).add(1.0)).applyFunction(log_float))))
+                     .multiply(-1.0 / (Y2.getWidth()))
+                     .sum();
+
+      std::vector<float> t = l.Mat2Vec();
+
+      for (int i = 0; i < l.getBatch(); i++) {
+        lossSum += t[i];
+      }
+      loss = lossSum / (float)l.getBatch();
+    } break;
+    case COST_UNKNOWN:
+    default:
+      break;
   }
 
   Tensor dJdW = Input.transpose().dot(dJdB);
diff --git a/src/neuralnet.cpp b/src/neuralnet.cpp
index c580f7d..ecee386 100644
--- a/src/neuralnet.cpp
+++ b/src/neuralnet.cpp
@@ -66,7 +66,7 @@ std::vector<std::string> Optimizer_string = {"sgd", "adam"};
  *            "msr"  : Mean Squared Roots
  *            "caterogical" : Categorical Cross Entropy
  */
-std::vector<std::string> Cost_string = {"msr", "categorical"};
+std::vector<std::string> Cost_string = {"categorical", "msr", "cross"};
 
 /**
  * @brief     Network Type String from configure file
diff --git a/src/tensor.cpp b/src/tensor.cpp
index 9af6800..a031b63 100644
--- a/src/tensor.cpp
+++ b/src/tensor.cpp
@@ -119,7 +119,7 @@ Tensor Tensor::add(float const &value) {
     tmp.data[i] = 1.0;
   cblas_saxpy(this->len, value, tmp.data.data(), 1, result.data.data(), 1);
 #else
-  for (int k = 0; k < batch; ++k) {
+  for (int k = 0; k < len; ++k) {
     result.data[k] = data[k] + value;
   }
 #endif
@@ -245,7 +245,7 @@ Tensor Tensor::divide(Tensor const &m) const {
         result.data[b + i + 2] = this->data[b + i + 2] / m.data[i + 2];
         result.data[b + i + 3] = this->data[b + i + 3] / m.data[i + 3];
       }
-      for (int j = i - 1; j < width * height; ++j)
+      for (int j = i; j < width * height; ++j)
         result.data[b + j] = this->data[b + j] / m.data[j];
     }
   } else {
@@ -255,7 +255,7 @@ Tensor Tensor::divide(Tensor const &m) const {
       result.data[i + 2] = this->data[i + 2] / m.data[i + 2];
       result.data[i + 3] = this->data[i + 3] / m.data[i + 3];
     }
-    for (int j = i - 1; j < len; ++j)
+    for (int j = i; j < len; ++j)
       result.data[j] = this->data[j] / m.data[j];
   }
 
-- 
2.7.4