mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 10:29:45 +08:00
[rllib] Improve error message for bad envs, add remote env docs (#4044)
* commit * fix up rew
This commit is contained in:
@@ -115,6 +115,8 @@ Vectorized
|
||||
|
||||
RLlib will auto-vectorize Gym envs for batch evaluation if the ``num_envs_per_worker`` config is set, or you can define a custom environment class that subclasses `VectorEnv <https://github.com/ray-project/ray/blob/master/python/ray/rllib/env/vector_env.py>`__ to implement ``vector_step()`` and ``vector_reset()``.
|
||||
|
||||
Note that auto-vectorization only applies to policy inference by default. This means that policy inference will be batched, but your envs will still be stepped one at a time. If you would like your envs to be stepped in parallel, you can set ``"remote_worker_envs": True``. This will create env instances in Ray actors and step them in parallel. These remote processes introduce communication overheads, so this only helps if your env is very expensive to step.
|
||||
|
||||
Multi-Agent and Hierarchical
|
||||
----------------------------
|
||||
|
||||
|
||||
Vendored
+10
-2
@@ -3,6 +3,7 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
|
||||
import ray
|
||||
from ray.rllib.utils.annotations import override, PublicAPI
|
||||
@@ -111,9 +112,16 @@ class _VectorizedGymEnv(VectorEnv):
|
||||
def vector_step(self, actions):
|
||||
obs_batch, rew_batch, done_batch, info_batch = [], [], [], []
|
||||
for i in range(self.num_envs):
|
||||
obs, rew, done, info = self.envs[i].step(actions[i])
|
||||
obs, r, done, info = self.envs[i].step(actions[i])
|
||||
if not np.isscalar(r) or not np.isreal(r) or not np.isfinite(r):
|
||||
raise ValueError(
|
||||
"Reward should be finite scalar, got {} ({})".format(
|
||||
r, type(r)))
|
||||
if type(info) is not dict:
|
||||
raise ValueError("Info should be a dict, got {} ({})".format(
|
||||
info, type(info)))
|
||||
obs_batch.append(obs)
|
||||
rew_batch.append(rew)
|
||||
rew_batch.append(r)
|
||||
done_batch.append(done)
|
||||
info_batch.append(info)
|
||||
return obs_batch, rew_batch, done_batch, info_batch
|
||||
|
||||
Reference in New Issue
Block a user