Add Logistic Regression
authorjijoong.moon <jijoong.moon@samsung.com>
Tue, 26 Nov 2019 01:20:16 +0000 (10:20 +0900)
committer문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
Tue, 26 Nov 2019 01:28:45 +0000 (10:28 +0900)
Add Logistic Regression with example

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
12 files changed:
LogisticRegression/README.md [new file with mode: 0644]
LogisticRegression/jni/Android.mk [new file with mode: 0644]
LogisticRegression/jni/Application.mk [new file with mode: 0644]
LogisticRegression/jni/CMakeLists.txt [new file with mode: 0644]
LogisticRegression/jni/main.cpp [new file with mode: 0644]
LogisticRegression/res/LogisticRegression.ini [new file with mode: 0644]
LogisticRegression/res/dataset1.txt [new file with mode: 0644]
LogisticRegression/res/test.txt [new file with mode: 0644]
NeuralNet/include/layers.h
NeuralNet/include/neuralnet.h
NeuralNet/layers.cpp
NeuralNet/neuralnet.cpp

diff --git a/LogisticRegression/README.md b/LogisticRegression/README.md
new file mode 100644 (file)
index 0000000..8fe60f2
--- /dev/null
@@ -0,0 +1,2 @@
+# Logistic Regression
+
diff --git a/LogisticRegression/jni/Android.mk b/LogisticRegression/jni/Android.mk
new file mode 100644 (file)
index 0000000..0327da9
--- /dev/null
@@ -0,0 +1,42 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+# ndk path
+ifndef ANDROID_NDK
+$(error ANDROID_NDK is not defined!)
+endif
+
+INIPARSER=../../iniparser/src/
+LOCAL_MODULE :=iniparser
+INIPARSER_SRCS := \
+       $(INIPARSER)/iniparser.c \
+       $(INIPARSER)/dictionary.c
+
+LOCAL_SRC_FILES :=$(INIPARSER_SRCS)
+LOCAL_C_INCLUDES := $(INIPARSER)
+
+LOCAL_CFLAGS += -O3 -DNDEBUG
+
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+NEURALNET=../../NeuralNet
+LOCAL_ARM_NEON := true
+LOCAL_CFLAGS += -std=c++11 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
+LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/arm64-v8a/
+LOCAL_CXXFLAGS += -std=c++11
+LOCAL_CFLAGS += -pthread -fopenmp
+LOCAL_LDFLAGS += -fopenmp 
+LOCAL_MODULE_TAGS := optional
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE := LogisticRegressoin
+
+LOCAL_SRC_FILES := main.cpp  $(NEURALNET)/matrix.cpp $(NEURALNET)/neuralnet.cpp \
+                  $(NEURALNET)/layers.cpp
+
+LOCAL_STATIC_LIBRARIES := iniparser
+
+LOCAL_C_INCLUDES += $(INIPARSER) $(NEURALNET)
+
+include $(BUILD_EXECUTABLE)
diff --git a/LogisticRegression/jni/Application.mk b/LogisticRegression/jni/Application.mk
new file mode 100644 (file)
index 0000000..228f653
--- /dev/null
@@ -0,0 +1,3 @@
+APP_ABI = arm64-v8a
+APP_STL = c++_shared
+APP_PLATFORM=android-24
diff --git a/LogisticRegression/jni/CMakeLists.txt b/LogisticRegression/jni/CMakeLists.txt
new file mode 100644 (file)
index 0000000..22153bf
--- /dev/null
@@ -0,0 +1,30 @@
+cmake_minimum_required(VERSION 2.8.3)
+project(LogisticRegression)
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror -g -pthread")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -g -std=c++11 -pthread")
+
+set(NEURALNET ../../NeuralNet)
+set(INIPARSER ../../iniparser/src)
+
+include_directories( ${include_directories}
+                    ${CMAKE_CURRENT_SOURCE_DIR}
+                    ${NEURALNET}
+                    ${INIPARSER}
+                    )
+                  
+set(SRCS
+       main.cpp
+       ${NEURALNET}/neuralnet.cpp
+       ${NEURALNET}/matrix.cpp
+       ${NEURALNET}/layers.cpp
+       ${INIPARSER}/iniparser.c
+       ${INIPARSER}/dictionary.c
+       )
+
+add_executable(LogisticRegression ${SRCS})
+target_link_libraries( LogisticRegression ${CMAKE_DL_LIBS})
+
+install (TARGETS LogisticRegression
+       RUNTIME DESTINATION bin
+       )
diff --git a/LogisticRegression/jni/main.cpp b/LogisticRegression/jni/main.cpp
new file mode 100644 (file)
index 0000000..b1e26ea
--- /dev/null
@@ -0,0 +1,84 @@
+#include <cmath>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <stdlib.h>
+#include <time.h>
+
+#include "include/matrix.h"
+#include "include/neuralnet.h"
+#define training false
+
+std::string data_file;
+
+double stepFunction(double x) {
+  if (x > 0.5) {
+    return 1.0;
+  }
+
+  if (x < 0.5) {
+    return 0.0;
+  }
+
+  return x;
+}
+
+int main(int argc, char *argv[]) {
+  if (argc < 3) {
+    std::cout << "./LogisticRegression Config.ini data.txt\n";
+    exit(0);
+  }
+
+  const std::vector<std::string> args(argv + 1, argv + argc);
+  std::string config = args[0];
+  data_file = args[1];
+
+  srand(time(NULL));
+
+  std::vector<std::vector<double>> inputVector, outputVector;
+  Network::NeuralNetwork NN(config);
+
+  NN.init();
+  if (!training)
+    NN.readModel();
+
+  std::ifstream dataFile(data_file);
+  if (dataFile.is_open()) {
+    std::string temp;
+    int index = 0;
+    while (std::getline(dataFile, temp)) {
+      if (training && index % 10 == 1) {
+        std::cout << temp << std::endl;
+        index++;
+        continue;
+      }
+      std::istringstream buffer(temp);
+      std::vector<double> line;
+      std::vector<double> out;
+      double x;
+      for (int i = 0; i < 2; i++) {
+        buffer >> x;
+        line.push_back(x);
+      }
+      inputVector.push_back(line);
+      buffer >> x;
+      out.push_back(x);
+      outputVector.push_back(out);
+      index++;
+    }
+  }
+  if (training) {
+    for (unsigned int i = 0; i < NN.getEpoch(); i++) {
+      NN.backwarding(Matrix(inputVector), Matrix(outputVector), i);
+      std::cout << "#" << i + 1 << "/" << NN.getEpoch()
+                << " - Loss : " << NN.getLoss() << std::endl;
+      NN.setLoss(0.0);
+    }
+  } else {
+    std::cout << NN.forwarding(Matrix(inputVector)).applyFunction(stepFunction)
+              << std::endl;
+  }
+
+  NN.saveModel();
+  NN.finalize();
+}
diff --git a/LogisticRegression/res/LogisticRegression.ini b/LogisticRegression/res/LogisticRegression.ini
new file mode 100644 (file)
index 0000000..c0a7e05
--- /dev/null
@@ -0,0 +1,29 @@
+# Network Section : Network
+[Network]
+Type = Regression      # Network Type : Regression, KNN, NeuralNetwork
+Layers = inputlayer \
+        outputlayer    #Layers of Neuralnetwork
+Learning_rate = 0.01   # Learning Rate
+Epoch = 100            # Epoch 
+Optimizer = sgd                # Optimizer : sgd (stochastic gradien decent),
+                       #             adam (Adamtive Moment Estimation)
+Activation = sigmoid   # activation : sigmoid, tanh
+Cost = logistic                # Cost(loss) function : msr (mean square root error)
+                        #                       logistic ( for logistic regression )
+Model = "model.bin"    # model path to save / read
+minibatch = 1          # mini batch size
+
+# Layer Section : Name
+[inputlayer]
+Type = InputLayer
+Id = 0                 # Layer Id
+Height = 10            
+Width = 2              # Input Layer Dimension
+Bias_zero = true       # Zero Bias
+
+[outputlayer]
+Type = OutputLayer
+Id = 1
+Height = 2             # Weight Height
+Width = 1              # Weight Width
+Bias_zero = true
diff --git a/LogisticRegression/res/dataset1.txt b/LogisticRegression/res/dataset1.txt
new file mode 100644 (file)
index 0000000..8727b12
--- /dev/null
@@ -0,0 +1,100 @@
+4.5192 2.6487 1.0\r
+2.4443 1.5438 1.0\r
+4.2409 1.899 1.0\r
+5.8097 2.4711 1.0\r
+6.4423 3.359 1.0\r
+5.8097 3.2406 1.0\r
+6.3917 3.8128 1.0\r
+6.8725 4.4441 1.0\r
+6.7966 3.6747 1.0\r
+8.163 4.7401 1.0\r
+7.4038 3.8917 1.0\r
+7.6316 4.602 1.0\r
+7.7581 5.7265 1.0\r
+6.5688 4.9571 1.0\r
+5.3543 3.9903 1.0\r
+4.4686 3.0236 1.0\r
+2.9757 2.0568 1.0\r
+2.4443 1.2676 1.0\r
+0.9008 1.169 1.0\r
+2.1154 1.7411 1.0\r
+3.2794 1.386 1.0\r
+4.165 1.5636 1.0\r
+4.8482 1.8793 1.0\r
+3.33 2.7868 1.0\r
+5.1518 3.5563 1.0\r
+6.2652 4.0693 1.0\r
+6.2652 4.3849 1.0\r
+7.2014 1.5438 1.0\r
+7.6569 2.412 1.0\r
+6.1387 1.7806 1.0\r
+4.4939 1.4057 1.0\r
+4.8735 2.6093 1.0\r
+5.5314 3.0828 1.0\r
+6.0121 3.9311 1.0\r
+7.1508 4.7598 1.0\r
+7.7075 5.3122 1.0\r
+8.3148 5.7068 1.0\r
+8.5172 5.1149 1.0\r
+8.7449 5.4109 1.0\r
+7.8593 3.8128 1.0\r
+6.999 3.2406 1.0\r
+5.5061 2.9052 1.0\r
+4.9241 2.6882 1.0\r
+6.6447 3.8325 1.0\r
+7.6822 4.5428 1.0\r
+8.0364 5.7857 1.0\r
+8.9221 6.5552 1.0\r
+7.8593 5.253 1.0\r
+6.5941 5.2333 1.0\r
+6.0374 4.7598 1.0\r
+2.7227 4.5822 0.0\r
+1.9383 3.6549 0.0\r
+1.6852 2.9841 0.0\r
+4.3168 4.4244 0.0\r
+3.4312 3.7536 0.0\r
+5.4808 5.2728 0.0\r
+4.1144 4.8387 0.0\r
+3.2034 4.4244 0.0\r
+4.1144 5.3911 0.0\r
+5.1012 6.0817 0.0\r
+4.8988 5.5687 0.0\r
+5.9615 6.4565 0.0\r
+5.7591 6.0028 0.0\r
+6.6953 6.7722 0.0\r
+5.7338 6.6538 0.0\r
+6.6194 7.1471 0.0\r
+7.2014 7.5219 0.0\r
+7.2014 6.8314 0.0\r
+8.5931 7.6206 0.0\r
+7.7581 7.1865 0.0\r
+7.7581 7.7784 0.0\r
+5.1012 7.6009 0.0\r
+4.2156 6.496 0.0\r
+3.4818 5.8055 0.0\r
+2.3684 5.0163 0.0\r
+1.7864 4.1876 0.0\r
+0.9008 3.4379 0.0\r
+0.9008 5.7857 0.0\r
+1.9636 6.3382 0.0\r
+1.4069 4.9571 0.0\r
+2.419 6.8511 0.0\r
+2.8745 6.0817 0.0\r
+4.0132 7.1668 0.0\r
+4.6711 7.226 0.0\r
+5.1771 8.1533 0.0\r
+6.2146 7.4825 0.0\r
+5.4555 7.0484 0.0\r
+5.9868 8.5084 0.0\r
+4.0891 7.5417 0.0\r
+2.3937 7.2063 0.0\r
+1.331 6.5355 0.0\r
+1.7358 5.4503 0.0\r
+2.4443 5.8449 0.0\r
+3.1781 4.8979 0.0\r
+4.6711 5.8055 0.0\r
+5.9868 7.3641 0.0\r
+4.6711 6.2592 0.0\r
+7.581 8.3703 0.0\r
+4.6457 8.5676 0.0\r
+4.6457 8.1676 0.0\r
diff --git a/LogisticRegression/res/test.txt b/LogisticRegression/res/test.txt
new file mode 100644 (file)
index 0000000..38a9858
--- /dev/null
@@ -0,0 +1,10 @@
+2.4443 1.5438 1.0\r
+7.6316 4.602 1.0\r
+4.165 1.5636 1.0\r
+4.8735 2.6093 1.0\r
+5.5061 2.9052 1.0\r
+1.9383 3.6549 0.0\r
+5.9615 6.4565 0.0\r
+5.1012 7.6009 0.0\r
+2.8745 6.0817 0.0\r
+1.7358 5.4503 0.0\r
index 7ff780c..a1c00cd 100644 (file)
@@ -55,7 +55,7 @@ public:
   InputLayer(){};
   ~InputLayer(){};
   void read(std::ifstream &file){};
-  void save(std::ofstream &file){};
+  void save(std::ofstream &file) { return; };
   Matrix backwarding(Matrix input, int iteration) { return Input; };
   Matrix forwarding(Matrix input);
   void setOptimizer(Optimizer opt);
@@ -93,6 +93,7 @@ public:
   void setOptimizer(Optimizer opt);
   void initialize(int b, int w, int h, int id, bool init_zero);
   double getLoss() { return loss; }
+  void setCost(cost_type c) { this->cost = c; };
   void copy(Layer *l);
 
 private:
@@ -101,6 +102,7 @@ private:
   Matrix M;
   Matrix V;
   double loss;
+  cost_type cost;
 };
 }
 
index c4f20f2..dbdf076 100644 (file)
@@ -35,6 +35,7 @@ public:
   void saveModel();
   void readModel();
   void setConfig(std::string config_path);
+  unsigned int getEpoch() { return epoch; };
   NeuralNetwork &copy(NeuralNetwork &from);
   void finalize();
 
index 45559b3..20ddf57 100644 (file)
@@ -159,6 +159,7 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero) {
   this->init_zero = init_zero;
   Weight = Matrix(h, w);
   Bias = Matrix(1, w);
+  this->cost = cost;
 
   Weight = Weight.applyFunction(random);
   if (init_zero) {
@@ -170,7 +171,10 @@ void OutputLayer::initialize(int b, int h, int w, int id, bool init_zero) {
 
 Matrix OutputLayer::forwarding(Matrix input) {
   Input = input;
-  hidden = input.dot(Weight).add(Bias).applyFunction(activation);
+  if (cost == COST_LOGISTIC)
+    hidden = input.dot(Weight).applyFunction(activation);
+  else
+    hidden = input.dot(Weight).add(Bias).applyFunction(activation);
   return hidden;
 }
 
@@ -224,18 +228,30 @@ Matrix OutputLayer::backwarding(Matrix label, int iteration) {
   double lossSum = 0.0;
   Matrix Y2 = label;
   Matrix Y = hidden;
-  Matrix sub = Y2.subtract(Y);
-  Matrix l = (sub.multiply(sub)).sum().multiply(0.5);
   Matrix ret;
-  std::vector<double> t = l.Mat2Vec();
-  for (int i = 0; i < l.getBatch(); i++) {
-    lossSum += t[i];
-  }
+  Matrix dJdB;
+
+  if (cost == COST_LOGISTIC) {
+    dJdB = Y.subtract(Y2);
+    Matrix temp =
+        ((Y2.multiply(-1.0).transpose().dot(Y.add(1e-5).applyFunction(log)))
+             .subtract(Y2.multiply(-1.0).add(1.0).transpose().dot(
+                 Y.multiply(-1.0).add(1.0).add(1e-5).applyFunction(log))));
+    loss = (1.0 / Y.Mat2Vec().size()) * temp.Mat2Vec()[0];
+  } else {
+    Matrix sub = Y2.subtract(Y);
+    Matrix l = (sub.multiply(sub)).sum().multiply(0.5);
+    std::vector<double> t = l.Mat2Vec();
+    for (int i = 0; i < l.getBatch(); i++) {
+      lossSum += t[i];
+    }
 
-  loss = lossSum / (double)l.getBatch();
+    loss = lossSum / (double)l.getBatch();
+
+    dJdB = Y.subtract(Y2).multiply(
+        Input.dot(Weight).add(Bias).applyFunction(activationPrime));
+  }
 
-  Matrix dJdB = Y.subtract(Y2).multiply(
-      Input.dot(Weight).add(Bias).applyFunction(activationPrime));
   Matrix dJdW = Input.transpose().dot(dJdB);
   ret = dJdB.dot(Weight.transpose());
 
index f7c7832..72285e6 100644 (file)
@@ -5,6 +5,19 @@
 #include <sstream>
 #include <stdio.h>
 
+bool compareChar(char &c1, char &c2) {
+  if (c1 == c2)
+    return true;
+  else if (std::toupper(c1) == std::toupper(c2))
+    return true;
+  return false;
+}
+
+bool caseInSensitiveCompare(std::string &str1, std::string &str2) {
+  return ((str1.size() == str2.size()) &&
+          std::equal(str1.begin(), str1.end(), str2.begin(), &compareChar));
+}
+
 namespace Network {
 
 std::vector<std::string> Optimizer_string = {"sgd", "adam"};
@@ -40,7 +53,7 @@ unsigned int parseType(std::string ll, input_type t) {
   switch (t) {
   case TOKEN_OPT:
     for (i = 0; i < Optimizer_string.size(); i++) {
-      if (Optimizer_string[i].compare(ll) == 0) {
+      if (caseInSensitiveCompare(Optimizer_string[i], ll)) {
         return (i);
       }
     }
@@ -48,7 +61,7 @@ unsigned int parseType(std::string ll, input_type t) {
     break;
   case TOKEN_COST:
     for (i = 0; i < Cost_string.size(); i++) {
-      if (Cost_string[i].compare(ll) == 0) {
+      if (caseInSensitiveCompare(Cost_string[i], ll)) {
         return (i);
       }
     }
@@ -56,7 +69,7 @@ unsigned int parseType(std::string ll, input_type t) {
     break;
   case TOKEN_NET:
     for (i = 0; i < NetworkType_string.size(); i++) {
-      if (NetworkType_string[i].compare(ll) == 0) {
+      if (caseInSensitiveCompare(NetworkType_string[i], ll)) {
         return (i);
       }
     }
@@ -64,7 +77,7 @@ unsigned int parseType(std::string ll, input_type t) {
     break;
   case TOKEN_ACTI:
     for (i = 0; i < activation_string.size(); i++) {
-      if (activation_string[i].compare(ll) == 0) {
+      if (caseInSensitiveCompare(activation_string[i], ll)) {
         return (i);
       }
     }
@@ -72,7 +85,7 @@ unsigned int parseType(std::string ll, input_type t) {
     break;
   case TOKEN_LAYER:
     for (i = 0; i < layer_string.size(); i++) {
-      if (layer_string[i].compare(ll) == 0) {
+      if (caseInSensitiveCompare(layer_string[i], ll)) {
         return (i);
       }
     }
@@ -126,9 +139,6 @@ void NeuralNetwork::init() {
   for (unsigned int i = 0; i < layers_name.size(); i++)
     std::cout << layers_name[i] << std::endl;
 
-  // std::cout << learning_rate<< " " << epoch << " " << opt.type<< " " <<
-  // opt.activation<< " " << cost << " " << model << " " << batchsize<< " \n";
-
   loss = 100000.0;
 
   for (unsigned int i = 0; i < layers_name.size(); i++) {
@@ -139,8 +149,8 @@ void NeuralNetwork::init() {
     id = iniparser_getint(ini, (layers_name[i] + ":Id").c_str(), 0);
     b_zero = iniparser_getboolean(ini, (layers_name[i] + ":Bias_zero").c_str(),
                                   true);
-    std::cout << l_type << " " << t << " " << w << " " << b_zero << " " << id
-              << std::endl;
+    std::cout << l_type << " " << t << " " << w << " " << h << " " << b_zero
+              << " " << id << std::endl;
     switch (t) {
     case Layers::LAYER_IN: {
       Layers::InputLayer *inputlayer = new (Layers::InputLayer);
@@ -161,6 +171,7 @@ void NeuralNetwork::init() {
       outputlayer->setType(t);
       outputlayer->initialize(batchsize, h, w, id, b_zero);
       outputlayer->setOptimizer(opt);
+      outputlayer->setCost(cost);
       layers.push_back(outputlayer);
     } break;
     case Layers::LAYER_UNKNOWN: