+/**
+ * @file cartpole.cpp
+ * @date 04 December 2019
+ * @brief This is environment class for cartpole example
+ * @see https://github.sec.samsung.net/jijoong-moon/Transfer-Learning.git
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
#include "cartpole.h"
#include <stdlib.h>
#define M_PI 3.14159265358979323846
-static double RandomDouble(double min, double max) {
- return min + ((double)rand() / (RAND_MAX / (max - min)));
-}
+/**
+ * @brief Generate Random Double value between min to max
+ * @retval random value
+ */
+static double RandomDouble(double min, double max) { return min + ((double)rand() / (RAND_MAX / (max - min))); }
+/**
+ * @brief Generate Random integer 0 or 1
+ * @retval random value
+ */
static int random0to1() { return rand() % 2; }
namespace Env {
S.observation.push_back(0.0);
}
-void CartPole::step(const std::vector<float> &action, bool rendering,
- State *s) {
+void CartPole::step(const std::vector<float> &action, bool rendering, State *s) {
double x = S.observation[0];
double x_dot = S.observation[1];
double theta = S.observation[2];
double costheta = cos(theta);
double sintheta = sin(theta);
- double temp =
- (force + polemass_length * theta_dot * theta_dot * sintheta) / total_mass;
+ double temp = (force + polemass_length * theta_dot * theta_dot * sintheta) / total_mass;
double thetaacc =
- (gravity * sintheta - costheta * temp) /
- (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass));
+ (gravity * sintheta - costheta * temp) / (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass));
double xacc = temp - polemass_length * thetaacc * costheta / total_mass;
x = x + tau * x_dot;
s->observation.push_back(theta);
s->observation.push_back(theta_dot);
- S.done = (bool)(x < x_threshold * -1.0 || x > x_threshold ||
- theta < theta_threshold_radians * -1.0 ||
+ S.done = (bool)(x < x_threshold * -1.0 || x > x_threshold || theta < theta_threshold_radians * -1.0 ||
theta > theta_threshold_radians);
// theta > theta_threshold_radians || count >= 200);
count++;
+/**
+ * @file cartpole.h
+ * @date 04 December 2019
+ * @brief This is environment class for cartpole example
+ * @see https://github.sec.samsung.net/jijoong-moon/Transfer-Learning.git
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
#ifndef __CARTPOLE_H__
#define __CARTPOLE_H__
#include <iostream>
#include <vector>
+/**
+ * @Namespace Namespace Env
+ * @brief Namespace Env
+ */
namespace Env {
-
+/**
+ * @brief State Data Type
+ * ovservation : state variables
+ * reward : reward
+ * done : boolean for end of episode
+ */
typedef struct {
std::vector<float> observation;
float reward;
std::string ginfo;
} State;
+/**
+ * @class CartPole Class
+ * @brief CartPole-v0 example for Reinforcement Learning
+ */
class CartPole {
-public:
+ public:
+ /**
+ * @brief Constructor of CartPole
+ */
CartPole(){};
+
+ /**
+ * @brief Destructor of CartPole
+ */
~CartPole(){};
+
+ /**
+ * @brief Initialization fo CarPole variables
+ * Set hyper parameters & set observation zero
+ */
void init();
+
+ /**
+ * @brief Run Env with action
+ * @param[in] action input action
+ * @param[in] rendering boolean variable to redering. (It is not used)
+ * @param[out]s State Output calculated by Env
+ */
void step(const std::vector<float> &action, bool rendering, State *s);
- void reset(State *initiali_s);
+
+ /**
+ * @brief reset Env
+ * @param[out] initial_s copy inialize State from this->S
+ */
+ void reset(State *initial_s);
+
+ /**
+ * @brief get InputSize : 4 (CartPole-v0 example)
+ * @retval inputsize
+ */
int getInputSize();
+
+ /**
+ * @brief get OutputSize : 2 (CartPole-v0 example)
+ * @retval outputSize
+ */
int getOutputSize();
+
+ /**
+ * @brief generate random action value
+ * @retval random action values as vector<float>
+ */
std::vector<float> sample();
-private:
+ private:
double gravity;
double masscart;
double masspole;
+/**
+ * @file main.cpp
+ * @date 04 December 2019
+ * @brief This is simple example to use Env CartPole-v0
+ * @see https://github.sec.samsung.net/jijoong-moon/Transfer-Learning.git
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
#include "cartpole.h"
#include <iostream>
#include <stdio.h>
Env::CartPole cartpole;
srand(time(NULL));
std::vector<float> action;
- action=cartpole.sample();
+ action = cartpole.sample();
cartpole.init();
for (int episode = 0; episode < 100; episode++) {
cartpole.reset(&state);
int total_steps = 0;
while (1) {
action = cartpole.sample();
- cartpole.step(action,false, &state);
+ cartpole.step(action, false, &state);
total_reward += state.reward;
total_steps += 1;
- printf("action : %f --> state : %f %f %f %f\n", action[0],
- state.observation[0], state.observation[1], state.observation[2],
- state.observation[3]);
+ printf("action : %f --> state : %f %f %f %f\n", action[0], state.observation[0], state.observation[1],
+ state.observation[2], state.observation[3]);
if (state.done)
break;
}
- printf("episode %i finished in %i steps with reward %02f\n", episode,
- total_steps, total_reward);
+ printf("episode %i finished in %i steps with reward %02f\n", episode, total_steps, total_reward);
}
}