[rllib] Improve error message for bad envs, add remote env docs (#4044)

* commit * fix up rew
2026-07-04 10:29:45 +08:00 · 2019-02-18 01:28:19 -08:00
parent b78d77257b
commit f8bef004da
2 changed files with 12 additions and 2 deletions
@@ -115,6 +115,8 @@ Vectorized

 RLlib will auto-vectorize Gym envs for batch evaluation if the ``num_envs_per_worker`` config is set, or you can define a custom environment class that subclasses `VectorEnv <https://github.com/ray-project/ray/blob/master/python/ray/rllib/env/vector_env.py>`__ to implement ``vector_step()`` and ``vector_reset()``.

+Note that auto-vectorization only applies to policy inference by default. This means that policy inference will be batched, but your envs will still be stepped one at a time. If you would like your envs to be stepped in parallel, you can set ``"remote_worker_envs": True``. This will create env instances in Ray actors and step them in parallel. These remote processes introduce communication overheads, so this only helps if your env is very expensive to step.
+
 Multi-Agent and Hierarchical
 ----------------------------

@@ -3,6 +3,7 @@ from __future__ import division
 from __future__ import print_function

 import logging
+import numpy as np

 import ray
 from ray.rllib.utils.annotations import override, PublicAPI
@@ -111,9 +112,16 @@ class _VectorizedGymEnv(VectorEnv):
    def vector_step(self, actions):
        obs_batch, rew_batch, done_batch, info_batch = [], [], [], []
        for i in range(self.num_envs):
-            obs, rew, done, info = self.envs[i].step(actions[i])
+            obs, r, done, info = self.envs[i].step(actions[i])
+            if not np.isscalar(r) or not np.isreal(r) or not np.isfinite(r):
+                raise ValueError(
+                    "Reward should be finite scalar, got {} ({})".format(
+                        r, type(r)))
+            if type(info) is not dict:
+                raise ValueError("Info should be a dict, got {} ({})".format(
+                    info, type(info)))
            obs_batch.append(obs)
-            rew_batch.append(rew)
+            rew_batch.append(r)
            done_batch.append(done)
            info_batch.append(info)
        return obs_batch, rew_batch, done_batch, info_batch