diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py index 5a2e23d6d..1375d8edf 100644 --- a/python/ray/rllib/models/preprocessors.py +++ b/python/ray/rllib/models/preprocessors.py @@ -61,8 +61,8 @@ class Preprocessor(object): self._obs_space, observation) except AttributeError: raise ValueError( - "Observation for a Box space should be an np.array, " - "not a Python list.", observation) + "Observation for a Box/MultiBinary/MultiDiscrete space " + "should be an np.array, not a Python list.", observation) self._i += 1 @property diff --git a/python/ray/rllib/optimizers/aso_multi_gpu_learner.py b/python/ray/rllib/optimizers/aso_multi_gpu_learner.py index 7931945b7..032dc9c69 100644 --- a/python/ray/rllib/optimizers/aso_multi_gpu_learner.py +++ b/python/ray/rllib/optimizers/aso_multi_gpu_learner.py @@ -47,9 +47,13 @@ class TFMultiGPULearner(LearnerThread): if not num_gpus: self.devices = ["/cpu:0"] elif _fake_gpus: - self.devices = ["/cpu:{}".format(i) for i in range(num_gpus)] + self.devices = [ + "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) + ] else: - self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)] + self.devices = [ + "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) + ] logger.info("TFMultiGPULearner devices {}".format(self.devices)) assert self.train_batch_size % len(self.devices) == 0 assert self.train_batch_size >= len(self.devices), "batch too small" diff --git a/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py b/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py index 4effcf32c..0a334e84e 100644 --- a/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py +++ b/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py @@ -68,6 +68,11 @@ class SyncBatchReplayOptimizer(PolicyOptimizer): batches = tmp for batch in batches: + if batch.count > self.max_buffer_size: + raise ValueError( + "The size of a single sample batch exceeds the replay " + "buffer size ({} > {})".format(batch.count, + self.max_buffer_size)) self.replay_buffer.append(batch) self.num_steps_sampled += batch.count self.buffer_size += batch.count