mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 15:00:17 +08:00
[rllib] Upgrade to OpenAI Gym 0.10.3 (#1601)
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import math
|
||||
from math import cos
|
||||
from gym.spaces import Box, Tuple, Discrete
|
||||
import numpy as np
|
||||
from gym.envs.classic_control.mountain_car import MountainCarEnv
|
||||
@@ -23,17 +23,17 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
|
||||
|
||||
self.action_space = [Discrete(3) for _ in range(2)]
|
||||
self.observation_space = Tuple([
|
||||
Box(self.low, self.high) for _ in range(2)])
|
||||
Box(self.low, self.high, dtype=np.float32) for _ in range(2)])
|
||||
|
||||
self._seed()
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
def _step(self, action):
|
||||
def step(self, action):
|
||||
summed_act = 0.5 * np.sum(action)
|
||||
|
||||
position, velocity = self.state
|
||||
velocity += (summed_act - 1) * 0.001
|
||||
velocity += math.cos(3 * position) * (-0.0025)
|
||||
velocity += cos(3 * position) * (-0.0025)
|
||||
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
|
||||
position += velocity
|
||||
position = np.clip(position, self.min_position, self.max_position)
|
||||
@@ -47,6 +47,6 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
|
||||
self.state = (position, velocity)
|
||||
return [np.array(self.state) for _ in range(2)], reward, done, {}
|
||||
|
||||
def _reset(self):
|
||||
def reset(self):
|
||||
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
|
||||
return [np.array(self.state) for _ in range(2)]
|
||||
|
||||
@@ -22,18 +22,20 @@ class MultiAgentPendulumEnv(PendulumEnv):
|
||||
|
||||
high = np.array([1., 1., self.max_speed])
|
||||
self.action_space = [Box(low=-self.max_torque / 2,
|
||||
high=self.max_torque / 2, shape=(1,))
|
||||
high=self.max_torque / 2,
|
||||
shape=(1,),
|
||||
dtype=np.float32)
|
||||
for _ in range(2)]
|
||||
self.observation_space = Tuple([
|
||||
Box(low=-high, high=high) for _ in range(2)])
|
||||
Box(low=-high, high=high, dtype=np.float32) for _ in range(2)])
|
||||
|
||||
self._seed()
|
||||
self.seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def _step(self, u):
|
||||
def step(self, u):
|
||||
th, thdot = self.state # th := theta
|
||||
|
||||
summed_u = np.sum(u)
|
||||
@@ -55,7 +57,7 @@ class MultiAgentPendulumEnv(PendulumEnv):
|
||||
self.state = np.array([newth, newthdot])
|
||||
return self._get_obs(), -costs, False, {}
|
||||
|
||||
def _reset(self):
|
||||
def reset(self):
|
||||
high = np.array([np.pi, 1])
|
||||
self.state = self.np_random.uniform(low=-high, high=high)
|
||||
self.last_u = None
|
||||
|
||||
Reference in New Issue
Block a user