From 98b2cbfa7f15744b9bddf650e872392b1a2a08c6 Mon Sep 17 00:00:00 2001
From: pranz24 <pranjal.tandon@gmail.com>
Date: Mon, 20 May 2019 12:37:41 +0530
Subject: [PATCH] Add Normalized Actions

---
 main.py               | 25 +++++++++++++++----------
 normalized_actions.py |  3 +++
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/main.py b/main.py
index 81ab134..da0781f 100644
--- a/main.py
+++ b/main.py
@@ -50,7 +50,7 @@ args = parser.parse_args()
 # Removing Normalized Actions. 
 # Another way to use it = actions * env.action_space.high[0] -> (https://github.com/sfujim/TD3). This does the same thing.
 # (or add env._max_episode_steps to normalized_actions.py)
-env = gym.make(args.env_name)  
+env = NormalizedActions(gym.make(args.env_name))
 torch.manual_seed(args.seed)
 np.random.seed(args.seed)
 env.seed(args.seed)
@@ -114,23 +114,28 @@ for i_episode in itertools.count(1):
     print("Episode: {}, total numsteps: {}, episode steps: {}, reward: {}".format(i_episode, total_numsteps, episode_steps, round(episode_reward, 2)))
 
     if i_episode % 10 == 0 and args.eval == True:
-        state = env.reset()
-        episode_reward = 0
-        done = False
-        while not done:
-            action = agent.select_action(state, eval=True)
+        avg_reward = 0.
+        episodes = 10
+        for _  in range(episodes):
+            state = env.reset()
+            episode_reward = 0
+            done = False
+            while not done:
+                action = agent.select_action(state, eval=True)
 
-            next_state, reward, done, _ = env.step(action)
-            episode_reward += reward
+                next_state, reward, done, _ = env.step(action)
+                episode_reward += reward
 
 
-            state = next_state
+                state = next_state
+            avg_reward += episode_reward
+        avg_reward /= epsiodes
 
 
         writer.add_scalar('reward/test', episode_reward, i_episode)
 
         print("----------------------------------------")
-        print("Test Episode: {}, reward: {}".format(i_episode, round(episode_reward, 2)))
+        print("Test Episodes: {}, Avg. Reward: {}".format(i_episode, round(avg_reward, 2)))
         print("----------------------------------------")
 
 env.close()
diff --git a/normalized_actions.py b/normalized_actions.py
index a0e64c7..9498816 100644
--- a/normalized_actions.py
+++ b/normalized_actions.py
@@ -14,3 +14,6 @@ class NormalizedActions(gym.ActionWrapper):
         action /= (self.action_space.high - self.action_space.low)
         action = action * 2 - 1
         return action
+
+    def _max_episode_steps(self):
+        return self._max_epsidoe_steps