diff --git a/python/ray/rllib/evaluation/policy_evaluator.py b/python/ray/rllib/evaluation/policy_evaluator.py index 0578c3417..d032c8da3 100644 --- a/python/ray/rllib/evaluation/policy_evaluator.py +++ b/python/ray/rllib/evaluation/policy_evaluator.py @@ -432,6 +432,8 @@ class PolicyEvaluator(EvaluatorInterface): info_out = {k: builder.get(v) for k, v in info_out.items()} else: for pid, batch in samples.policy_batches.items(): + if pid not in self.policies_to_train: + continue grad_out[pid], info_out[pid] = ( self.policy_map[pid].compute_gradients(batch)) else: @@ -472,6 +474,8 @@ class PolicyEvaluator(EvaluatorInterface): info_out = {k: builder.get(v) for k, v in info_out.items()} else: for pid, batch in samples.policy_batches.items(): + if pid not in self.policies_to_train: + continue info_out[pid], _ = ( self.policy_map[pid].compute_apply(batch)) return info_out diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index 4c0a20f77..2bb6d2823 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -105,6 +105,12 @@ class ModelCatalog(object): config = config or MODEL_DEFAULTS if isinstance(action_space, gym.spaces.Box): + if len(action_space.shape) > 1: + raise ValueError( + "Action space has multiple dimensions " + "{}. ".format(action_space.shape) + + "Consider reshaping this into a single dimension, " + "using a Tuple action space, or the multi-agent API.") if dist_type is None: dist = DiagGaussian if config.get("squash_to_range"):