Add tanh & minibach mode

author jijoong.moon <jijoong.moon@samsung.com>

Fri, 15 Nov 2019 07:55:20 +0000 (16:55 +0900)

committer 문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>

Fri, 15 Nov 2019 08:09:22 +0000 (17:09 +0900)
author jijoong.moon <jijoong.moon@samsung.com>
Fri, 15 Nov 2019 07:55:20 +0000 (16:55 +0900)
committer 문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
Fri, 15 Nov 2019 08:09:22 +0000 (17:09 +0900)
diff --git a/DeepQ/include/matrix.h b/DeepQ/include/matrix.h

index bf0d779..2ad9f9c 100644 (file)
--- a/DeepQ/include/matrix.h
+++ b/DeepQ/include/matrix.h
@@ -1,6 +1,7 @@
  #ifndef MATRIX_H
  #define MATRIX_H
  
+#include <cmath>
  #include <fstream>
  #include <iostream>
  #include <vector>
@@ -9,7 +10,9 @@ class Matrix {
  public:
    Matrix();
    Matrix(int height, int width);
+  Matrix(int batch, int height, int width);
    Matrix(std::vector<std::vector<double>> const &array);
+  Matrix(std::vector<std::vector<std::vector<double>>> const &array);
  
    Matrix multiply(double const &value);
  
@@ -19,7 +22,10 @@ public:
  
    Matrix dot(Matrix const &m) const;
    Matrix transpose() const;
-  double sum() const;
+  Matrix sum() const;
+  Matrix average() const;
+  Matrix softmax() const;
+  void setZero();
  
    std::vector<double> Mat2Vec();
  
@@ -29,6 +35,8 @@ public:
  
    int getWidth() { return width; };
    int getHeight() { return height; };
+  int getBatch() { return batch; };
+  void setValue(int batch, int i, int j, double value);
  
    Matrix &copy(Matrix const &from);
  
@@ -36,9 +44,11 @@ public:
    void read(std::ifstream &file);
  
  private:
-  std::vector<std::vector<double>> array;
+  std::vector<std::vector<std::vector<double>>> array;
    int height;
    int width;
+  int batch;
+  int dim;
  };
  
  std::ostream &operator<<(std::ostream &flux, Matrix const &m);
diff --git a/DeepQ/include/neuralnet.h b/DeepQ/include/neuralnet.h

index fe628c9..838e2b0 100644 (file)
--- a/DeepQ/include/neuralnet.h
+++ b/DeepQ/include/neuralnet.h
@@ -2,6 +2,8 @@
  #define __NEURALNET_H__
  
  #include "matrix.h"
+#include <fstream>
+#include <iostream>
  #include <vector>
  
  namespace Network {
@@ -14,21 +16,27 @@ public:
    double getLoss();
    void setLoss(double l);
  
-  void init(int input, int hidden, int output, double rate);
+  void init(int input, int hidden, int output, int batch, double rate,
+            std::string acti, bool init_zero);
+  Matrix forwarding(Matrix input);
    Matrix forwarding(std::vector<double> input);
-  void backwarding(std::vector<double> expectedOutput);
+  void backwarding(Matrix input, Matrix expectedOutput);
    void saveModel(std::string model_path);
-  void readModel(std::string model_path);  
-  NeuralNetwork &copy(NeuralNetwork  &from);
+  void readModel(std::string model_path);
+  NeuralNetwork &copy(NeuralNetwork &from);
  
  private:
-  Matrix X, W1, W2, W3, Y, Y2, B1, B2, B3, H1, H2, dJdB1, dJdB2, dJdB3, dJdW1, dJdW2, dJdW3;
+  Matrix W1, W2, W3, B1, B2, B3, H1, H2;
  
    int inputNeuron;
    int outputNeuron;
    int hiddenNeuron;
+  int batchsize;
+  double (*activation)(double);
+  double (*activationPrime)(double);
    double learning_rate;
    double loss;
+  bool init_zero;
  };
  }
  
diff --git a/DeepQ/main.cpp b/DeepQ/main.cpp

index 5e7781c..563ef15 100644 (file)
--- a/DeepQ/main.cpp
+++ b/DeepQ/main.cpp
@@ -6,14 +6,16 @@
  #include <iterator>
  #include <queue>
  #include <stdio.h>
+#include <unistd.h>
  
  #define MAX_EPISODS 10000
-#define HIDDEN_LAYER_SIZE 40
+#define HIDDEN_LAYER_SIZE 50
  #define RENDER true
  #define REPLAY_MEMORY 50000
-#define MINI_BATCH 10
+#define MINI_BATCH 30
  #define DISCOUNT 0.9
  #define TRAINING true
+#define LEARNIG_RATE 0.05
  
  typedef struct {
    Gym::State state;
@@ -41,7 +43,6 @@ static int rangeRandom(int min, int max) {
  static std::vector<Experience> getMiniBatch(std::deque<Experience> Q) {
    int max = (MINI_BATCH > Q.size()) ? MINI_BATCH : Q.size();
    int min = (MINI_BATCH < Q.size()) ? MINI_BATCH : Q.size();
-  // std::cout << "MINI_BATCH : "<< MINI_BATCH <<" q.size : " <<Q.size()<<"\n";
  
    bool duplicate[max];
    std::vector<int> mem;
@@ -71,8 +72,8 @@ static int argmax(std::vector<double> vec) {
    int ret = 0;
    double val = 0.0;
    for (unsigned int i = 0; i < vec.size(); i++) {
-    if (val < vec[i]) {
-      val = vec[i];
+    if (val < vec[i] * 100000) {
+      val = vec[i] * 100000;
        ret = i;
      }
    }
@@ -155,8 +156,10 @@ int main(int argc, char **argv) {
    Network::NeuralNetwork mainNet;
    Network::NeuralNetwork targetNet;
  
-  mainNet.init(input_size, HIDDEN_LAYER_SIZE, output_size, 0.9);
-  targetNet.init(input_size, HIDDEN_LAYER_SIZE, output_size, 0.9);
+  mainNet.init(input_size, HIDDEN_LAYER_SIZE, output_size, MINI_BATCH,
+               LEARNIG_RATE, "tanh", true);
+  targetNet.init(input_size, HIDDEN_LAYER_SIZE, output_size, MINI_BATCH,
+                 LEARNIG_RATE, "tanh", true);
  
    if (is_file_exist(model_path)) {
      mainNet.readModel(model_path);
@@ -164,7 +167,6 @@ int main(int argc, char **argv) {
    }
  
    targetNet.copy(mainNet);
-  // writeFile << "init loss " << mainNet.getLoss() << "\n";
  
    for (int episode = 0; episode < MAX_EPISODS; episode++) {
      float epsilon = 1. / ((episode / 10) + 1);
@@ -182,11 +184,10 @@ int main(int argc, char **argv) {
        if (r < epsilon) {
          action_space = env->action_space();
          action = action_space->sample();
-        // std::cout <<" epsilon : r "<< epsilon << " : "<<r  <<"\n";
          std::cout << "test result random action : " << action[0] << "\n";
        } else {
          std::vector<double> input(s.observation.begin(), s.observation.end());
-        Matrix test = mainNet.forwarding(input);
+        Matrix test = mainNet.forwarding(Matrix({input}));
          std::vector<double> temp = test.Mat2Vec();
          action.push_back(argmax(temp));
  
@@ -210,8 +211,10 @@ int main(int argc, char **argv) {
        done = next_s.done;
        if (done) {
          std::cout << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DONE : Episode "
-                  << episode << " \n";
-        ex.reward = -100;
+                  << episode << " Iteration : " << step_count << "\n";
+        ex.reward = -100.0;
+        if (!TRAINING)
+          break;
        }
  
        expQ.push_back(ex);
@@ -219,50 +222,65 @@ int main(int argc, char **argv) {
        s = next_s;
        step_count++;
  
-      if (step_count > 10000)
+      if (step_count > 10000) {
+        std::cout << "step_count is over 10000\n";
          break;
+      }
      }
      if (step_count > 10000)
        break;
  
+    if (!TRAINING && done)
+      break;
+
      if (episode % 10 == 1 && TRAINING) {
        for (int iter = 0; iter < 50; iter++) {
          std::vector<Experience> in_Exp = getMiniBatch(expQ);
+        std::vector<std::vector<std::vector<double>>> inbatch;
+        std::vector<std::vector<std::vector<double>>> next_inbatch;
+
          for (unsigned int i = 0; i < in_Exp.size(); i++) {
            Gym::State state = in_Exp[i].state;
            Gym::State next_state = in_Exp[i].next_state;
-
            std::vector<double> in(state.observation.begin(),
                                   state.observation.end());
-          Matrix Q = mainNet.forwarding(in);
-          std::vector<double> qa = Q.Mat2Vec();
+          inbatch.push_back({in});
+
            std::vector<double> next_in(next_state.observation.begin(),
                                        next_state.observation.end());
-          Matrix NQ = targetNet.forwarding(next_in);
-          std::vector<double> nqa = NQ.Mat2Vec();
-          double next = (nqa[0] > nqa[1]) ? nqa[0] : nqa[1];
+          next_inbatch.push_back({next_in});
+        }
  
+        Matrix Q = mainNet.forwarding(Matrix(inbatch));
+
+        Matrix NQ = targetNet.forwarding(Matrix(next_inbatch));
+        std::vector<double> nqa = NQ.Mat2Vec();
+
+        for (unsigned int i = 0; i < in_Exp.size(); i++) {
            if (in_Exp[i].done) {
-            qa[in_Exp[i].action[0]] = (double)in_Exp[i].reward;
+            Q.setValue(i, 0, (int)in_Exp[i].action[0],
+                       (double)in_Exp[i].reward);
            } else {
-            qa[in_Exp[i].action[0]] =
-                (double)(in_Exp[i].reward + DISCOUNT * next);
+            double next = (nqa[i * NQ.getWidth()] > nqa[i * NQ.getWidth() + 1])
+                              ? nqa[i * NQ.getWidth()]
+                              : nqa[i * NQ.getWidth() + 1];
+            Q.setValue(i, 0, (int)in_Exp[i].action[0],
+                       (double)in_Exp[i].reward + DISCOUNT * next);
            }
-
-          std::vector<double> _in(qa.begin(), qa.end());
-          mainNet.backwarding(_in);
          }
+        mainNet.backwarding(Matrix(inbatch), Q);
        }
  
-      writeFile << "===================== Loss : " << mainNet.getLoss()
-                << " mainNet\n";
-      std::cout << "\n\n===================== Loss : " << mainNet.getLoss()
-                << " mainNet\n";
-
+      writeFile << "=== mainNet Loss : " << mainNet.getLoss()
+                << " : targetNet Loss : " << targetNet.getLoss() << "\n";
+      std::cout << "=== mainNet Loss : " << mainNet.getLoss()
+                << " : targetNet Loss : " << targetNet.getLoss() << "\n";
        targetNet.copy(mainNet);
        mainNet.saveModel(model_path);
+      sleep(1);
      }
    }
+
    writeFile.close();
    return 0;
  }
diff --git a/DeepQ/matrix.cpp b/DeepQ/matrix.cpp

index 6e2d6a7..5dc9cb9 100644 (file)
--- a/DeepQ/matrix.cpp
+++ b/DeepQ/matrix.cpp
@@ -8,24 +8,50 @@ Matrix::Matrix() {}
  Matrix::Matrix(int height, int width) {
    this->height = height;
    this->width = width;
-  this->array =
-      std::vector<std::vector<double>>(height, std::vector<double>(width));
+  this->batch = 1;
+  this->dim = 2;
+  this->array.push_back(
+      std::vector<std::vector<double>>(height, std::vector<double>(width)));
+}
+
+Matrix::Matrix(int batch, int height, int width) {
+  this->height = height;
+  this->width = width;
+  this->batch = batch;
+  this->dim = 3;
+  for (int i = 0; i < batch; i++) {
+    this->array.push_back(
+        std::vector<std::vector<double>>(height, std::vector<double>(width)));
+  }
  }
  
  Matrix::Matrix(std::vector<std::vector<double>> const &array) {
    assert(array.size() != 0);
    this->height = array.size();
    this->width = array[0].size();
+  this->batch = 1;
+  this->dim = 2;
+  this->array.push_back(array);
+}
+
+Matrix::Matrix(std::vector<std::vector<std::vector<double>>> const &array) {
+  assert(array.size() != 0 && array[0].size() != 0);
+  this->batch = array.size();
+  this->height = array[0].size();
+  this->width = array[0][0].size();
+  this->dim = 3;
    this->array = array;
  }
  
  Matrix Matrix::multiply(double const &value) {
-  Matrix result(height, width);
-  int i, j;
+  Matrix result(batch, height, width);
+  int i, j, k;
  
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      result.array[i][j] = array[i][j] * value;
+  for (k = 0; k < batch; k++) {
+    for (i = 0; i < height; i++) {
+      for (j = 0; j < width; j++) {
+        result.array[k][i][j] = array[k][i][j] * value;
+      }
      }
    }
  
@@ -33,15 +59,25 @@ Matrix Matrix::multiply(double const &value) {
  }
  
  Matrix Matrix::add(Matrix const &m) const {
-  // std::cout << height << " " << m.height <<" " << width << " " <<
-  // m.width<<"\n";
    assert(height == m.height && width == m.width);
  
-  Matrix result(height, width);
-  int i, j;
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      result.array[i][j] = array[i][j] + m.array[i][j];
+  Matrix result(batch, height, width);
+  int i, j, k;
+  if (m.batch == 1) {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          result.array[k][i][j] = array[k][i][j] + m.array[0][i][j];
+        }
+      }
+    }
+  } else {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          result.array[k][i][j] = array[k][i][j] + m.array[k][i][j];
+        }
+      }
      }
    }
    return result;
@@ -49,58 +85,101 @@ Matrix Matrix::add(Matrix const &m) const {
  
  Matrix Matrix::subtract(Matrix const &m) const {
    assert(height == m.height && width == m.width);
-  Matrix result(height, width);
-  int i, j;
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      result.array[i][j] = array[i][j] - m.array[i][j];
+  Matrix result(batch, height, width);
+  int i, j, k;
+
+  if (m.batch == 1) {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          result.array[k][i][j] = array[k][i][j] - m.array[0][i][j];
+        }
+      }
+    }
+  } else {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          result.array[k][i][j] = array[k][i][j] - m.array[k][i][j];
+        }
+      }
      }
    }
-
    return result;
  }
  
  Matrix Matrix::multiply(Matrix const &m) const {
    assert(height == m.height && width == m.width);
-  Matrix result(height, width);
+  Matrix result(batch, height, width);
  
-  int i, j;
+  int i, j, k;
  
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      result.array[i][j] = array[i][j] * m.array[i][j];
+  if (m.batch == 1) {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          result.array[k][i][j] = array[k][i][j] * m.array[0][i][j];
+        }
+      }
+    }
+  } else {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+          result.array[k][i][j] = array[k][i][j] * m.array[k][i][j];
+        }
+      }
      }
    }
  
    return result;
  }
  
-double Matrix::sum() const {
-  int i, j;
-  double ret = 0.0;
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      ret += array[i][j];
+Matrix Matrix::sum() const {
+  int i, j, k;
+  Matrix ret(batch, 1, 1);
+
+  for (k = 0; k < batch; k++) {
+    ret.array[k][0][0] = 0.0;
+    for (i = 0; i < height; i++) {
+      for (j = 0; j < width; j++) {
+        ret.array[k][0][0] += array[k][i][j];
+      }
      }
    }
+
    return ret;
  }
  
  Matrix Matrix::dot(Matrix const &m) const {
    assert(width == m.height);
-  int i, j, h, mwidth = m.width;
+  int i, j, h, k, mwidth = m.width;
    double w = 0;
  
-  Matrix result(height, mwidth);
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < mwidth; j++) {
-      for (h = 0; h < width; h++) {
-        w += array[i][h] * m.array[h][j];
+  Matrix result(batch, height, mwidth);
+  if (m.batch == 1) {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < mwidth; j++) {
+          for (h = 0; h < width; h++) {
+            w += array[k][i][h] * m.array[0][h][j];
+          }
+          result.array[k][i][j] = w;
+          w = 0;
+        }
+      }
+    }
+  } else {
+    for (k = 0; k < batch; k++) {
+      for (i = 0; i < height; i++) {
+        for (j = 0; j < mwidth; j++) {
+          for (h = 0; h < width; h++) {
+            w += array[k][i][h] * m.array[k][h][j];
+          }
+          result.array[k][i][j] = w;
+          w = 0;
+        }
        }
-      result.array[i][j] = w;
-      w = 0;
      }
    }
  
@@ -108,23 +187,27 @@ Matrix Matrix::dot(Matrix const &m) const {
  }
  
  Matrix Matrix::transpose() const {
-  Matrix result(width, height);
-  int i, j;
-  for (i = 0; i < width; i++) {
-    for (j = 0; j < height; j++) {
-      result.array[i][j] = array[j][i];
+  Matrix result(batch, width, height);
+  int i, j, k;
+  for (k = 0; k < batch; k++) {
+    for (i = 0; i < width; i++) {
+      for (j = 0; j < height; j++) {
+        result.array[k][i][j] = array[k][j][i];
+      }
      }
    }
    return result;
  }
  
  Matrix Matrix::applyFunction(double (*function)(double)) const {
-  Matrix result(height, width);
-  int i, j;
+  Matrix result(batch, height, width);
+  int i, j, k;
  
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      result.array[i][j] = (*function)(array[i][j]);
+  for (k = 0; k < batch; k++) {
+    for (i = 0; i < height; i++) {
+      for (j = 0; j < width; j++) {
+        result.array[k][i][j] = (*function)(array[k][i][j]);
+      }
      }
    }
  
@@ -132,35 +215,41 @@ Matrix Matrix::applyFunction(double (*function)(double)) const {
  }
  
  void Matrix::print(std::ostream &flux) const {
-  int i, j;
-  int maxLength[width];
+  int i, j, k, l;
+  int maxLength[batch][width];
    std::stringstream ss;
  
-  for (i = 0; i < width; i++) {
-    maxLength[i] = 0;
+  for (k = 0; k < batch; k++) {
+    for (i = 0; i < width; i++) {
+      maxLength[k][i] = 0;
+    }
    }
  
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      ss << array[i][j];
-      if (maxLength[j] < (int)(ss.str().size())) {
-        maxLength[j] = ss.str().size();
+  for (k = 0; k < batch; k++) {
+    for (i = 0; i < height; i++) {
+      for (j = 0; j < width; j++) {
+        ss << array[k][i][j];
+        if (maxLength[k][j] < (int)(ss.str().size())) {
+          maxLength[k][j] = ss.str().size();
+        }
+        ss.str(std::string());
        }
-      ss.str(std::string());
      }
    }
  
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      flux << array[i][j];
-      ss << array[i][j];
+  for (l = 0; l < batch; l++) {
+    for (i = 0; i < height; i++) {
+      for (j = 0; j < width; j++) {
+        flux << array[l][i][j];
+        ss << array[l][i][j];
  
-      for (int k = 0; k < (int)(maxLength[j] - ss.str().size() + 1); k++) {
-        flux << " ";
+        for (int k = 0; k < (int)(maxLength[l][j] - ss.str().size() + 1); k++) {
+          flux << " ";
+        }
+        ss.str(std::string());
        }
-      ss.str(std::string());
+      flux << std::endl;
      }
-    flux << std::endl;
    }
  }
  
@@ -173,10 +262,12 @@ Matrix &Matrix::copy(const Matrix &from) {
    if (this != &from) {
      height = from.height;
      width = from.width;
-    printf("%d %d\n", height, width);
-    for (int i = 0; i < height; i++) {
-      for (int j = 0; j < width; j++) {
-        array[i][j] = from.array[i][j];
+    batch = from.batch;
+    for (int k = 0; k < batch; k++) {
+      for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+          array[k][i][j] = from.array[k][i][j];
+        }
        }
      }
    }
@@ -185,26 +276,86 @@ Matrix &Matrix::copy(const Matrix &from) {
  
  std::vector<double> Matrix::Mat2Vec() {
    std::vector<double> ret;
-  // printf(" -----  width : height %d:%d\n", width, height);
-  for (int i = 0; i < height; i++)
-    for (int j = 0; j < width; j++)
-      ret.push_back(array[i][j]);
+  for (int k = 0; k < batch; k++)
+    for (int i = 0; i < height; i++)
+      for (int j = 0; j < width; j++)
+        ret.push_back(array[k][i][j]);
  
    return ret;
  }
  
  void Matrix::save(std::ofstream &file) {
-  for (int i = 0; i < height; i++) {
-    for (int j = 0; j < width; j++) {
-      file.write((char *)&array[i][j], sizeof(double));
+  for (int k = 0; k < batch; k++) {
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        file.write((char *)&array[k][i][j], sizeof(double));
+      }
      }
    }
  }
  
  void Matrix::read(std::ifstream &file) {
+  for (int k = 0; k < batch; k++) {
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        file.read((char *)&array[k][i][j], sizeof(double));
+      }
+    }
+  }
+}
+
+Matrix Matrix::average() const {
+  if (batch == 1)
+    return *this;
+
+  Matrix result(1, height, width);
    for (int i = 0; i < height; i++) {
      for (int j = 0; j < width; j++) {
-      file.read((char *)&array[i][j], sizeof(double));
+      result.array[0][i][j] = 0.0;
+      for (int k = 0; k < batch; k++) {
+        result.array[0][i][j] += array[k][i][j];
+      }
+      result.array[0][i][j] = result.array[0][i][j] / (double)batch;
      }
    }
+  return result;
+}
+
+void Matrix::setZero() {
+  for (int k = 0; k < batch; k++) {
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        this->array[k][i][j] = 0.0;
+      }
+    }
+  }
+}
+
+Matrix Matrix::softmax() const {
+  Matrix result(batch, height, width);
+  Matrix mother(batch, height, 1);
+
+  mother.setZero();
+
+  for (int k = 0; k < batch; k++) {
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        mother.array[k][i][0] += exp(this->array[k][i][j]);
+      }
+    }
+  }
+
+  for (int k = 0; k < batch; k++) {
+    for (int i = 0; i < height; i++) {
+      for (int j = 0; j < width; j++) {
+        result.array[k][i][j] =
+            exp(this->array[k][i][j]) / mother.array[k][i][0];
+      }
+    }
+  }
+  return result;
+}
+
+void Matrix::setValue(int batch, int height, int width, double value) {
+  this->array[batch][height][width] = value;
  }
diff --git a/DeepQ/neuralnet.cpp b/DeepQ/neuralnet.cpp

index 18de9d0..fd58f64 100644 (file)
--- a/DeepQ/neuralnet.cpp
+++ b/DeepQ/neuralnet.cpp
@@ -1,21 +1,34 @@
  #include "neuralnet.h"
+#include <assert.h>
  #include <cmath>
-#include <iostream>
-#include <fstream>
  #include <stdio.h>
  
+// double random(double x) {
+//   double min =-0.01;
+//   double max = 0.01;
+//   double r = (double)rand() / (double)RAND_MAX;
+//   return min + r * (max - min);
+// }
+
  double random(double x) { return (double)(rand() % 10000 + 1) / 10000 - 0.5; }
  
  double sigmoid(double x) { return 1 / (1 + exp(-x)); }
  
  double sigmoidePrime(double x) { return exp(-x) / (pow(1 + exp(-x), 2)); }
  
+double tanhPrime(double x) {
+  double th = tanh(x);
+  return 1.0 - th * th;
+}
+
  namespace Network {
-  void NeuralNetwork::init(int input, int hidden, int output, double rate) {
+void NeuralNetwork::init(int input, int hidden, int output, int batch,
+                         double rate, std::string acti, bool init_zero) {
    inputNeuron = input;
    hiddenNeuron = hidden;
    outputNeuron = output;
-  
+  batchsize = batch;
+
    learning_rate = rate;
    loss = 100000.0;
    W1 = Matrix(inputNeuron, hiddenNeuron);
@@ -23,89 +36,136 @@ namespace Network {
    W3 = Matrix(hiddenNeuron, outputNeuron);
    B1 = Matrix(1, hiddenNeuron);
    B2 = Matrix(1, hiddenNeuron);
-  B3 = Matrix(1, outputNeuron);  
+  B3 = Matrix(1, outputNeuron);
  
-  W1 = W1.applyFunction(random);
-  W2 = W2.applyFunction(random);
-  W3 = W3.applyFunction(random);  
-  B1 = B1.applyFunction(random);
-  B2 = B2.applyFunction(random);
-  B3 = B3.applyFunction(random);
+  if (init_zero) {
+    W1 = W1.applyFunction(random);
+    W2 = W2.applyFunction(random);
+    W3 = W3.applyFunction(random);
+    B1 = B1.multiply(0.0);
+    B2 = B2.multiply(0.0);
+    B3 = B3.multiply(0.0);
+  } else {
+    W1 = W1.applyFunction(random);
+    W2 = W2.applyFunction(random);
+    W3 = W3.applyFunction(random);
+    B1 = B1.applyFunction(random);
+    B2 = B2.applyFunction(random);
+    B3 = B3.applyFunction(random);
+  }
  
+  if (acti.compare("tanh") == 0) {
+    activation = tanh;
+    activationPrime = tanhPrime;
+  } else {
+    activation = sigmoid;
+    activationPrime = sigmoidePrime;
+  }
+  this->init_zero = init_zero;
  }
-  
+
  Matrix NeuralNetwork::forwarding(std::vector<double> input) {
-  X = Matrix({input});
-  H1 = X.dot(W1).add(B1).applyFunction(sigmoid);
-  H2 = H1.dot(W2).add(B2).applyFunction(sigmoid);
-  Y = H2.dot(W3).add(B3).applyFunction(sigmoid);
+  assert(batchsize == 1);
+  Matrix X = Matrix({input});
+  H1 = X.dot(W1).add(B1).applyFunction(activation);
+  H2 = H1.dot(W2).add(B2).applyFunction(activation);
+  Matrix Y = H2.dot(W3).add(B3).applyFunction(activation);
    return Y;
+  // return Y.softmax();
  }
  
-void NeuralNetwork::backwarding(std::vector<double> expectedOutput) {
-  Matrix Y2 = Matrix({expectedOutput});
-  double l = sqrt((Y2.subtract(Y)).multiply(Y2.subtract(Y)).sum()) * 1.0 / 2.0;
-  if (l < loss)
-    loss = l;
-  dJdB3 =
-      Y.subtract(Y2).multiply(H2.dot(W3).add(B3).applyFunction(sigmoidePrime));
-  dJdB2 =
-    dJdB3.dot(W3.transpose()).multiply(H1.dot(W2).add(B2).applyFunction(sigmoidePrime));
-  dJdB1 =
-    dJdB2.dot(W2.transpose()).multiply(X.dot(W1).add(B1).applyFunction(sigmoidePrime));
-  dJdW3 = H2.transpose().dot(dJdB3);
-  dJdW2 = H1.transpose().dot(dJdB2);
-  dJdW1 = X.transpose().dot(dJdB1);
-
-  W1 = W1.subtract(dJdW1.multiply(learning_rate));
-  W2 = W2.subtract(dJdW2.multiply(learning_rate));
-  W3 = W3.subtract(dJdW3.multiply(learning_rate));
-  B1 = B1.subtract(dJdB1.multiply(learning_rate));
-  B2 = B2.subtract(dJdB2.multiply(learning_rate));
-  B3 = B3.subtract(dJdB3.multiply(learning_rate));  
+Matrix NeuralNetwork::forwarding(Matrix input) {
+  Matrix X = input;
+  H1 = X.dot(W1).add(B1).applyFunction(activation);
+  H2 = H1.dot(W2).add(B2).applyFunction(activation);
+  Matrix Y = H2.dot(W3).add(B3).applyFunction(activation);
+  return Y;
+  // return Y.softmax();
+}
+
+void NeuralNetwork::backwarding(Matrix input, Matrix expected_output) {
+  double lossSum = 0.0;
+  // Matrix Y2 = expected_output.softmax();
+  Matrix Y2 = expected_output;
+  Matrix X = input;
+  Matrix Y = forwarding(X);
+
+  Matrix sub = Y2.subtract(Y);
+  Matrix l = (sub.multiply(sub)).sum().multiply(0.5);
+
+  std::vector<double> t = l.Mat2Vec();
+  for (int i = 0; i < l.getBatch(); i++) {
+    lossSum += t[i];
+  }
+
+  loss = lossSum / (double)l.getBatch();
+
+  Matrix dJdB3 = Y.subtract(Y2).multiply(
+      H2.dot(W3).add(B3).applyFunction(activationPrime));
+  Matrix dJdB2 =
+      dJdB3.dot(W3.transpose())
+          .multiply(H1.dot(W2).add(B2).applyFunction(activationPrime));
+  Matrix dJdB1 =
+      dJdB2.dot(W2.transpose())
+          .multiply(X.dot(W1).add(B1).applyFunction(activationPrime));
+
+  Matrix dJdW3 = H2.transpose().dot(dJdB3);
+  Matrix dJdW2 = H1.transpose().dot(dJdB2);
+  Matrix dJdW1 = X.transpose().dot(dJdB1);
+
+  W1 = W1.subtract(dJdW1.average().multiply(learning_rate));
+  W2 = W2.subtract(dJdW2.average().multiply(learning_rate));
+  W3 = W3.subtract(dJdW3.average().multiply(learning_rate));
+
+  if (!init_zero) {
+    B1 = B1.subtract(dJdB1.average().multiply(learning_rate));
+    B2 = B2.subtract(dJdB2.average().multiply(learning_rate));
+    B3 = B3.subtract(dJdB3.average().multiply(learning_rate));
+  }
  }
  
  double NeuralNetwork::getLoss() { return loss; }
  void NeuralNetwork::setLoss(double l) { loss = l; }
  
-NeuralNetwork &NeuralNetwork::copy(NeuralNetwork  &from) {
+NeuralNetwork &NeuralNetwork::copy(NeuralNetwork &from) {
    if (this != &from) {
      inputNeuron = from.inputNeuron;
      outputNeuron = from.outputNeuron;
      hiddenNeuron = from.hiddenNeuron;
+    batchsize = from.batchsize;
      learning_rate = from.learning_rate;
      loss = from.loss;
  
      W1.copy(from.W1);
      W2.copy(from.W2);
      W3.copy(from.W3);
-    
+
      B1.copy(from.B1);
      B2.copy(from.B2);
-    B3.copy(from.B3);    
+    B3.copy(from.B3);
    }
    return *this;
  }
  
  void NeuralNetwork::saveModel(std::string model_path) {
    std::ofstream modelFile(model_path, std::ios::out | std::ios::binary);
-    W1.save(modelFile);
-    W2.save(modelFile);
-    W3.save(modelFile);
-    B1.save(modelFile);
-    B2.save(modelFile);
-    B3.save(modelFile);
-    modelFile.close();
+  W1.save(modelFile);
+  W2.save(modelFile);
+  W3.save(modelFile);
+  B1.save(modelFile);
+  B2.save(modelFile);
+  B3.save(modelFile);
+  modelFile.close();
  }
  
  void NeuralNetwork::readModel(std::string model_path) {
    std::ifstream modelFile(model_path, std::ios::in | std::ios::binary);
-    W1.read(modelFile);
-    W2.read(modelFile);
-    W3.read(modelFile);
-    B1.read(modelFile);
-    B2.read(modelFile);
-    B3.read(modelFile);
-    modelFile.close();
-}  
+  W1.read(modelFile);
+  W2.read(modelFile);
+  W3.read(modelFile);
+  B1.read(modelFile);
+  B2.read(modelFile);
+  B3.read(modelFile);
+  modelFile.close();
+}
  }
author	jijoong.moon <jijoong.moon@samsung.com>
	Fri, 15 Nov 2019 07:55:20 +0000 (16:55 +0900)
committer	문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
	Fri, 15 Nov 2019 08:09:22 +0000 (17:09 +0900)
DeepQ/include/matrix.h		patch \| blob \| history
DeepQ/include/neuralnet.h		patch \| blob \| history
DeepQ/main.cpp		patch \| blob \| history
DeepQ/matrix.cpp		patch \| blob \| history
DeepQ/neuralnet.cpp		patch \| blob \| history