Add Normalized Actions

This commit is contained in:
pranz24
2019-05-20 12:37:41 +05:30
parent 7ff1e2f4e4
commit 98b2cbfa7f
2 changed files with 18 additions and 10 deletions
+15 -10
View File
@@ -50,7 +50,7 @@ args = parser.parse_args()
# Removing Normalized Actions.
# Another way to use it = actions * env.action_space.high[0] -> (https://github.com/sfujim/TD3). This does the same thing.
# (or add env._max_episode_steps to normalized_actions.py)
env = gym.make(args.env_name)
env = NormalizedActions(gym.make(args.env_name))
torch.manual_seed(args.seed)
np.random.seed(args.seed)
env.seed(args.seed)
@@ -114,23 +114,28 @@ for i_episode in itertools.count(1):
print("Episode: {}, total numsteps: {}, episode steps: {}, reward: {}".format(i_episode, total_numsteps, episode_steps, round(episode_reward, 2)))
if i_episode % 10 == 0 and args.eval == True:
state = env.reset()
episode_reward = 0
done = False
while not done:
action = agent.select_action(state, eval=True)
avg_reward = 0.
episodes = 10
for _ in range(episodes):
state = env.reset()
episode_reward = 0
done = False
while not done:
action = agent.select_action(state, eval=True)
next_state, reward, done, _ = env.step(action)
episode_reward += reward
next_state, reward, done, _ = env.step(action)
episode_reward += reward
state = next_state
state = next_state
avg_reward += episode_reward
avg_reward /= epsiodes
writer.add_scalar('reward/test', episode_reward, i_episode)
print("----------------------------------------")
print("Test Episode: {}, reward: {}".format(i_episode, round(episode_reward, 2)))
print("Test Episodes: {}, Avg. Reward: {}".format(i_episode, round(avg_reward, 2)))
print("----------------------------------------")
env.close()
+3
View File
@@ -14,3 +14,6 @@ class NormalizedActions(gym.ActionWrapper):
action /= (self.action_space.high - self.action_space.low)
action = action * 2 - 1
return action
def _max_episode_steps(self):
return self._max_epsidoe_steps