Add Custom Environment Cartpole
authorjijoong.moon <jijoong.moon@samsung.com>
Tue, 19 Nov 2019 00:11:20 +0000 (09:11 +0900)
committer문지중/On-Device Lab(SR)/Principal Engineer/삼성전자 <jijoong.moon@samsung.com>
Tue, 19 Nov 2019 00:24:37 +0000 (09:24 +0900)
Depending the USING_CUSTOM_ENV definition, can choose custom
env (Env::CartPole) or Gym::Environment.

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
DeepQ/CMakeLists.txt
DeepQ/main.cpp

index e76dbce..3b5b8cd 100644 (file)
@@ -3,6 +3,9 @@ project(DeepQ)
 
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror -g")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -g -std=c++11")
+
+ADD_DEFINITIONS(-DUSING_CUSTOM_ENV)
+
 find_package(PkgConfig REQUIRED)
 
 pkg_check_modules(JSONCPP jsoncpp)
@@ -11,20 +14,18 @@ link_libraries(${JSONCPP_LIBRARIES})
 pkg_check_modules(CURL libcurl)
 link_libraries(${CURL_LIBRARIES})
 
-pkg_check_modules(PYTHON3 python3)
-link_libraries(${PYTHON3_LIBRARIES})
-
-
+set(ENVDIR ../Environment)
 include_directories( ${include_directories}
-                    ${CMAKE_CURRENT_SOURCE_DIR}/include
-                    ${PYTHON3_INCLUDE_DIRS}
+                     ${CMAKE_CURRENT_SOURCE_DIR}/include
                     ${JSONCPP_INCLUDE_DIRS}
+                    ${ENVDIR}
                     )
 set(SRCS
        main.cpp
        gym_binding.cpp
        neuralnet.cpp
        matrix.cpp
+       ${ENVDIR}/CartPole/cartpole.cpp
        )
 
 add_executable(DeepQ ${SRCS})
index df53860..2813f60 100644 (file)
@@ -8,6 +8,15 @@
 #include <stdio.h>
 #include <unistd.h>
 
+#ifdef USING_CUSTOM_ENV
+#include "CartPole/cartpole.h"
+#define STATE Env::State
+#define ENV Env::CartPole
+#else
+#define STATE Gym::State
+#define ENV Gym::Environment
+#endif
+
 #define MAX_EPISODS 50000
 #define HIDDEN_LAYER_SIZE 50
 #define RENDER true
 #define LEARNIG_RATE 0.001
 
 typedef struct {
-  Gym::State state;
+  STATE state;
   std::vector<float> action;
   float reward;
-  Gym::State next_state;
+  STATE next_state;
   bool done;
 } Experience;
 
@@ -80,45 +89,33 @@ static int argmax(std::vector<double> vec) {
   return ret;
 }
 
-#if 0
-static void run_single_environment(const boost::shared_ptr<Gym::Client> &client,
-                                   const std::string &env_id,
-                                   int episodes_to_run) {
-  boost::shared_ptr<Gym::Environment> env = client->make(env_id);
-  boost::shared_ptr<Gym::Space> action_space = env->action_space();
-  boost::shared_ptr<Gym::Space> observation_space = env->observation_space();
+static boost::shared_ptr<ENV> init_environment(int &input_size,
+                                               int &output_size) {
 
-  for (int e = 0; e < episodes_to_run; ++e) {
-    printf("%s episode %i...\n", env_id.c_str(), e);
-    Gym::State s;
-    env->reset(&s);
-    float total_reward = 0;
-    int total_steps = 0;
-    while (1) {
-      std::vector<float> action = action_space->sample();
-      env->step(action, true, &s);
-      assert(s.observation.size() == observation_space->sample().size());
-      total_reward += s.reward;
-      total_steps += 1;
-      if (s.done)
-        break;
-    }
-    printf("%s episode %i finished in %i steps with reward %0.2f\n",
-           env_id.c_str(), e, total_steps, total_reward);
+#ifdef USING_CUSTOM_ENV
+
+  boost::shared_ptr<ENV> env(new ENV);
+  env->init();
+  input_size = env->getInputSize();
+  output_size = env->getOutputSize();
+#else
+  boost::shared_ptr<Gym::Client> client;
+  std::string env_id = "CartPole-v0";
+  try {
+    client = Gym::client_create("127.0.0.1", 5000);
+  } catch (const std::exception &e) {
+    fprintf(stderr, "ERROR: %s\n", e.what());
+    return NULL;
   }
-}
-#endif
 
-static boost::shared_ptr<Gym::Environment>
-init_environment(const boost::shared_ptr<Gym::Client> &client,
-                 const std::string &env_id, int &input_size, int &output_size) {
-  boost::shared_ptr<Gym::Environment> env = client->make(env_id);
+  boost::shared_ptr<ENV> env = client->make(env_id);
   boost::shared_ptr<Gym::Space> action_space = env->action_space();
   boost::shared_ptr<Gym::Space> observation_space = env->observation_space();
 
   input_size = observation_space->sample().size();
 
   output_size = action_space->discreet_n;
+#endif
 
   return env;
 }
@@ -138,20 +135,10 @@ int main(int argc, char **argv) {
   srand(time(NULL));
   std::deque<Experience> expQ;
 
-  boost::shared_ptr<Gym::Client> client;
-  boost::shared_ptr<Gym::Environment> env;
-  boost::shared_ptr<Gym::Space> action_space;
+  boost::shared_ptr<ENV> env;
 
   int input_size, output_size;
-  try {
-    // client = Gym::client_create("127.0.0.1", 5000);
-    client = Gym::client_create("10.113.112.176", 5000);
-  } catch (const std::exception &e) {
-    fprintf(stderr, "ERROR: %s\n", e.what());
-    return 1;
-  }
-
-  env = init_environment(client, "CartPole-v0", input_size, output_size);
+  env = init_environment(input_size, output_size);
   printf("input_size %d, output_size %d\n", input_size, output_size);
 
   Network::NeuralNetwork mainNet;
@@ -176,8 +163,8 @@ int main(int argc, char **argv) {
     float epsilon = 1. / ((episode / 10) + 1);
     bool done = false;
     int step_count = 0;
-    Gym::State s;
-    Gym::State next_s;
+    STATE s;
+    STATE next_s;
 
     env->reset(&s);
 
@@ -186,8 +173,12 @@ int main(int argc, char **argv) {
       double r = RandomDouble(0.0, 1.0);
 
       if (r < epsilon && TRAINING) {
-        action_space = env->action_space();
+#ifdef USING_CUSTOM_ENV
+        action = env->sample();
+#else
+        boost::shared_ptr<Gym::Space> action_space = env->action_space();
         action = action_space->sample();
+#endif
         std::cout << "test result random action : " << action[0] << "\n";
       } else {
         std::vector<double> input(s.observation.begin(), s.observation.end());
@@ -244,8 +235,8 @@ int main(int argc, char **argv) {
         std::vector<std::vector<std::vector<double>>> next_inbatch;
 
         for (unsigned int i = 0; i < in_Exp.size(); i++) {
-          Gym::State state = in_Exp[i].state;
-          Gym::State next_state = in_Exp[i].next_state;
+          STATE state = in_Exp[i].state;
+          STATE next_state = in_Exp[i].next_state;
           std::vector<double> in(state.observation.begin(),
                                  state.observation.end());
           inbatch.push_back({in});