[rllib] Upgrade to OpenAI Gym 0.10.3 (#1601)

This commit is contained in:
butchcom
2018-03-06 08:31:02 +00:00
committed by Richard Liaw
parent 162d063f0d
commit 936bebef99
11 changed files with 63 additions and 49 deletions
@@ -1,4 +1,4 @@
import math
from math import cos
from gym.spaces import Box, Tuple, Discrete
import numpy as np
from gym.envs.classic_control.mountain_car import MountainCarEnv
@@ -23,17 +23,17 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
self.action_space = [Discrete(3) for _ in range(2)]
self.observation_space = Tuple([
Box(self.low, self.high) for _ in range(2)])
Box(self.low, self.high, dtype=np.float32) for _ in range(2)])
self._seed()
self.seed()
self.reset()
def _step(self, action):
def step(self, action):
summed_act = 0.5 * np.sum(action)
position, velocity = self.state
velocity += (summed_act - 1) * 0.001
velocity += math.cos(3 * position) * (-0.0025)
velocity += cos(3 * position) * (-0.0025)
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
position += velocity
position = np.clip(position, self.min_position, self.max_position)
@@ -47,6 +47,6 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
self.state = (position, velocity)
return [np.array(self.state) for _ in range(2)], reward, done, {}
def _reset(self):
def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return [np.array(self.state) for _ in range(2)]
@@ -22,18 +22,20 @@ class MultiAgentPendulumEnv(PendulumEnv):
high = np.array([1., 1., self.max_speed])
self.action_space = [Box(low=-self.max_torque / 2,
high=self.max_torque / 2, shape=(1,))
high=self.max_torque / 2,
shape=(1,),
dtype=np.float32)
for _ in range(2)]
self.observation_space = Tuple([
Box(low=-high, high=high) for _ in range(2)])
Box(low=-high, high=high, dtype=np.float32) for _ in range(2)])
self._seed()
self.seed()
def _seed(self, seed=None):
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _step(self, u):
def step(self, u):
th, thdot = self.state # th := theta
summed_u = np.sum(u)
@@ -55,7 +57,7 @@ class MultiAgentPendulumEnv(PendulumEnv):
self.state = np.array([newth, newthdot])
return self._get_obs(), -costs, False, {}
def _reset(self):
def reset(self):
high = np.array([np.pi, 1])
self.state = self.np_random.uniform(low=-high, high=high)
self.last_u = None