From 98b2cbfa7f15744b9bddf650e872392b1a2a08c6 Mon Sep 17 00:00:00 2001 From: pranz24 Date: Mon, 20 May 2019 12:37:41 +0530 Subject: [PATCH] Add Normalized Actions --- main.py | 25 +++++++++++++++---------- normalized_actions.py | 3 +++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/main.py b/main.py index 81ab134..da0781f 100644 --- a/main.py +++ b/main.py @@ -50,7 +50,7 @@ args = parser.parse_args() # Removing Normalized Actions. # Another way to use it = actions * env.action_space.high[0] -> (https://github.com/sfujim/TD3). This does the same thing. # (or add env._max_episode_steps to normalized_actions.py) -env = gym.make(args.env_name) +env = NormalizedActions(gym.make(args.env_name)) torch.manual_seed(args.seed) np.random.seed(args.seed) env.seed(args.seed) @@ -114,23 +114,28 @@ for i_episode in itertools.count(1): print("Episode: {}, total numsteps: {}, episode steps: {}, reward: {}".format(i_episode, total_numsteps, episode_steps, round(episode_reward, 2))) if i_episode % 10 == 0 and args.eval == True: - state = env.reset() - episode_reward = 0 - done = False - while not done: - action = agent.select_action(state, eval=True) + avg_reward = 0. + episodes = 10 + for _ in range(episodes): + state = env.reset() + episode_reward = 0 + done = False + while not done: + action = agent.select_action(state, eval=True) - next_state, reward, done, _ = env.step(action) - episode_reward += reward + next_state, reward, done, _ = env.step(action) + episode_reward += reward - state = next_state + state = next_state + avg_reward += episode_reward + avg_reward /= epsiodes writer.add_scalar('reward/test', episode_reward, i_episode) print("----------------------------------------") - print("Test Episode: {}, reward: {}".format(i_episode, round(episode_reward, 2))) + print("Test Episodes: {}, Avg. Reward: {}".format(i_episode, round(avg_reward, 2))) print("----------------------------------------") env.close() diff --git a/normalized_actions.py b/normalized_actions.py index a0e64c7..9498816 100644 --- a/normalized_actions.py +++ b/normalized_actions.py @@ -14,3 +14,6 @@ class NormalizedActions(gym.ActionWrapper): action /= (self.action_space.high - self.action_space.low) action = action * 2 - 1 return action + + def _max_episode_steps(self): + return self._max_epsidoe_steps