diff --git a/doc/source/rllib-env.rst b/doc/source/rllib-env.rst index e8d787d6d..b66f470ac 100644 --- a/doc/source/rllib-env.rst +++ b/doc/source/rllib-env.rst @@ -115,6 +115,8 @@ Vectorized RLlib will auto-vectorize Gym envs for batch evaluation if the ``num_envs_per_worker`` config is set, or you can define a custom environment class that subclasses `VectorEnv `__ to implement ``vector_step()`` and ``vector_reset()``. +Note that auto-vectorization only applies to policy inference by default. This means that policy inference will be batched, but your envs will still be stepped one at a time. If you would like your envs to be stepped in parallel, you can set ``"remote_worker_envs": True``. This will create env instances in Ray actors and step them in parallel. These remote processes introduce communication overheads, so this only helps if your env is very expensive to step. + Multi-Agent and Hierarchical ---------------------------- diff --git a/python/ray/rllib/env/vector_env.py b/python/ray/rllib/env/vector_env.py index 565097a40..e1de12375 100644 --- a/python/ray/rllib/env/vector_env.py +++ b/python/ray/rllib/env/vector_env.py @@ -3,6 +3,7 @@ from __future__ import division from __future__ import print_function import logging +import numpy as np import ray from ray.rllib.utils.annotations import override, PublicAPI @@ -111,9 +112,16 @@ class _VectorizedGymEnv(VectorEnv): def vector_step(self, actions): obs_batch, rew_batch, done_batch, info_batch = [], [], [], [] for i in range(self.num_envs): - obs, rew, done, info = self.envs[i].step(actions[i]) + obs, r, done, info = self.envs[i].step(actions[i]) + if not np.isscalar(r) or not np.isreal(r) or not np.isfinite(r): + raise ValueError( + "Reward should be finite scalar, got {} ({})".format( + r, type(r))) + if type(info) is not dict: + raise ValueError("Info should be a dict, got {} ({})".format( + info, type(info))) obs_batch.append(obs) - rew_batch.append(rew) + rew_batch.append(r) done_batch.append(done) info_batch.append(info) return obs_batch, rew_batch, done_batch, info_batch