From e5f8dac0a9988051582fb2c44b59761988b8e714 Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Wed, 4 Dec 2019 19:16:53 +0900 Subject: [PATCH] Add Doxygen Documentation for Environment Add Doxygen Doc. for Environment **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- Environment/CartPole/cartpole.cpp | 33 ++++++++++++------- Environment/CartPole/cartpole.h | 69 ++++++++++++++++++++++++++++++++++++--- Environment/CartPole/main.cpp | 22 +++++++++---- 3 files changed, 102 insertions(+), 22 deletions(-) diff --git a/Environment/CartPole/cartpole.cpp b/Environment/CartPole/cartpole.cpp index 0f31d09..2ef1d8c 100644 --- a/Environment/CartPole/cartpole.cpp +++ b/Environment/CartPole/cartpole.cpp @@ -1,12 +1,27 @@ +/** + * @file cartpole.cpp + * @date 04 December 2019 + * @brief This is environment class for cartpole example + * @see https://github.sec.samsung.net/jijoong-moon/Transfer-Learning.git + * @author Jijoong Moon + * @bug No known bugs except for NYI items + * + */ #include "cartpole.h" #include #define M_PI 3.14159265358979323846 -static double RandomDouble(double min, double max) { - return min + ((double)rand() / (RAND_MAX / (max - min))); -} +/** + * @brief Generate Random Double value between min to max + * @retval random value + */ +static double RandomDouble(double min, double max) { return min + ((double)rand() / (RAND_MAX / (max - min))); } +/** + * @brief Generate Random integer 0 or 1 + * @retval random value + */ static int random0to1() { return rand() % 2; } namespace Env { @@ -30,8 +45,7 @@ void CartPole::init() { S.observation.push_back(0.0); } -void CartPole::step(const std::vector &action, bool rendering, - State *s) { +void CartPole::step(const std::vector &action, bool rendering, State *s) { double x = S.observation[0]; double x_dot = S.observation[1]; double theta = S.observation[2]; @@ -40,11 +54,9 @@ void CartPole::step(const std::vector &action, bool rendering, double costheta = cos(theta); double sintheta = sin(theta); - double temp = - (force + polemass_length * theta_dot * theta_dot * sintheta) / total_mass; + double temp = (force + polemass_length * theta_dot * theta_dot * sintheta) / total_mass; double thetaacc = - (gravity * sintheta - costheta * temp) / - (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass)); + (gravity * sintheta - costheta * temp) / (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass)); double xacc = temp - polemass_length * thetaacc * costheta / total_mass; x = x + tau * x_dot; @@ -62,8 +74,7 @@ void CartPole::step(const std::vector &action, bool rendering, s->observation.push_back(theta); s->observation.push_back(theta_dot); - S.done = (bool)(x < x_threshold * -1.0 || x > x_threshold || - theta < theta_threshold_radians * -1.0 || + S.done = (bool)(x < x_threshold * -1.0 || x > x_threshold || theta < theta_threshold_radians * -1.0 || theta > theta_threshold_radians); // theta > theta_threshold_radians || count >= 200); count++; diff --git a/Environment/CartPole/cartpole.h b/Environment/CartPole/cartpole.h index ed7581b..16cf7b9 100644 --- a/Environment/CartPole/cartpole.h +++ b/Environment/CartPole/cartpole.h @@ -1,3 +1,12 @@ +/** + * @file cartpole.h + * @date 04 December 2019 + * @brief This is environment class for cartpole example + * @see https://github.sec.samsung.net/jijoong-moon/Transfer-Learning.git + * @author Jijoong Moon + * @bug No known bugs except for NYI items + * + */ #ifndef __CARTPOLE_H__ #define __CARTPOLE_H__ @@ -5,8 +14,17 @@ #include #include +/** + * @Namespace Namespace Env + * @brief Namespace Env + */ namespace Env { - +/** + * @brief State Data Type + * ovservation : state variables + * reward : reward + * done : boolean for end of episode + */ typedef struct { std::vector observation; float reward; @@ -14,18 +32,61 @@ typedef struct { std::string ginfo; } State; +/** + * @class CartPole Class + * @brief CartPole-v0 example for Reinforcement Learning + */ class CartPole { -public: + public: + /** + * @brief Constructor of CartPole + */ CartPole(){}; + + /** + * @brief Destructor of CartPole + */ ~CartPole(){}; + + /** + * @brief Initialization fo CarPole variables + * Set hyper parameters & set observation zero + */ void init(); + + /** + * @brief Run Env with action + * @param[in] action input action + * @param[in] rendering boolean variable to redering. (It is not used) + * @param[out]s State Output calculated by Env + */ void step(const std::vector &action, bool rendering, State *s); - void reset(State *initiali_s); + + /** + * @brief reset Env + * @param[out] initial_s copy inialize State from this->S + */ + void reset(State *initial_s); + + /** + * @brief get InputSize : 4 (CartPole-v0 example) + * @retval inputsize + */ int getInputSize(); + + /** + * @brief get OutputSize : 2 (CartPole-v0 example) + * @retval outputSize + */ int getOutputSize(); + + /** + * @brief generate random action value + * @retval random action values as vector + */ std::vector sample(); -private: + private: double gravity; double masscart; double masspole; diff --git a/Environment/CartPole/main.cpp b/Environment/CartPole/main.cpp index f82e77e..706471f 100644 --- a/Environment/CartPole/main.cpp +++ b/Environment/CartPole/main.cpp @@ -1,3 +1,13 @@ +/** + * @file main.cpp + * @date 04 December 2019 + * @brief This is simple example to use Env CartPole-v0 + * @see https://github.sec.samsung.net/jijoong-moon/Transfer-Learning.git + * @author Jijoong Moon + * @bug No known bugs except for NYI items + * + */ + #include "cartpole.h" #include #include @@ -7,7 +17,7 @@ int main() { Env::CartPole cartpole; srand(time(NULL)); std::vector action; - action=cartpole.sample(); + action = cartpole.sample(); cartpole.init(); for (int episode = 0; episode < 100; episode++) { cartpole.reset(&state); @@ -15,16 +25,14 @@ int main() { int total_steps = 0; while (1) { action = cartpole.sample(); - cartpole.step(action,false, &state); + cartpole.step(action, false, &state); total_reward += state.reward; total_steps += 1; - printf("action : %f --> state : %f %f %f %f\n", action[0], - state.observation[0], state.observation[1], state.observation[2], - state.observation[3]); + printf("action : %f --> state : %f %f %f %f\n", action[0], state.observation[0], state.observation[1], + state.observation[2], state.observation[3]); if (state.done) break; } - printf("episode %i finished in %i steps with reward %02f\n", episode, - total_steps, total_reward); + printf("episode %i finished in %i steps with reward %02f\n", episode, total_steps, total_reward); } } -- 2.7.4