mirror of
https://github.com/wassname/pytorch-soft-actor-critic.git
synced 2026-06-27 19:00:26 +08:00
Add Normalized Actions
This commit is contained in:
@@ -50,7 +50,7 @@ args = parser.parse_args()
|
||||
# Removing Normalized Actions.
|
||||
# Another way to use it = actions * env.action_space.high[0] -> (https://github.com/sfujim/TD3). This does the same thing.
|
||||
# (or add env._max_episode_steps to normalized_actions.py)
|
||||
env = gym.make(args.env_name)
|
||||
env = NormalizedActions(gym.make(args.env_name))
|
||||
torch.manual_seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
env.seed(args.seed)
|
||||
@@ -114,23 +114,28 @@ for i_episode in itertools.count(1):
|
||||
print("Episode: {}, total numsteps: {}, episode steps: {}, reward: {}".format(i_episode, total_numsteps, episode_steps, round(episode_reward, 2)))
|
||||
|
||||
if i_episode % 10 == 0 and args.eval == True:
|
||||
state = env.reset()
|
||||
episode_reward = 0
|
||||
done = False
|
||||
while not done:
|
||||
action = agent.select_action(state, eval=True)
|
||||
avg_reward = 0.
|
||||
episodes = 10
|
||||
for _ in range(episodes):
|
||||
state = env.reset()
|
||||
episode_reward = 0
|
||||
done = False
|
||||
while not done:
|
||||
action = agent.select_action(state, eval=True)
|
||||
|
||||
next_state, reward, done, _ = env.step(action)
|
||||
episode_reward += reward
|
||||
next_state, reward, done, _ = env.step(action)
|
||||
episode_reward += reward
|
||||
|
||||
|
||||
state = next_state
|
||||
state = next_state
|
||||
avg_reward += episode_reward
|
||||
avg_reward /= epsiodes
|
||||
|
||||
|
||||
writer.add_scalar('reward/test', episode_reward, i_episode)
|
||||
|
||||
print("----------------------------------------")
|
||||
print("Test Episode: {}, reward: {}".format(i_episode, round(episode_reward, 2)))
|
||||
print("Test Episodes: {}, Avg. Reward: {}".format(i_episode, round(avg_reward, 2)))
|
||||
print("----------------------------------------")
|
||||
|
||||
env.close()
|
||||
|
||||
@@ -14,3 +14,6 @@ class NormalizedActions(gym.ActionWrapper):
|
||||
action /= (self.action_space.high - self.action_space.low)
|
||||
action = action * 2 - 1
|
||||
return action
|
||||
|
||||
def _max_episode_steps(self):
|
||||
return self._max_epsidoe_steps
|
||||
|
||||
Reference in New Issue
Block a user