diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py
index 5a2e23d6d..1375d8edf 100644
--- a/python/ray/rllib/models/preprocessors.py
+++ b/python/ray/rllib/models/preprocessors.py
@@ -61,8 +61,8 @@ class Preprocessor(object):
                         self._obs_space, observation)
             except AttributeError:
                 raise ValueError(
-                    "Observation for a Box space should be an np.array, "
-                    "not a Python list.", observation)
+                    "Observation for a Box/MultiBinary/MultiDiscrete space "
+                    "should be an np.array, not a Python list.", observation)
         self._i += 1
 
     @property
diff --git a/python/ray/rllib/optimizers/aso_multi_gpu_learner.py b/python/ray/rllib/optimizers/aso_multi_gpu_learner.py
index 7931945b7..032dc9c69 100644
--- a/python/ray/rllib/optimizers/aso_multi_gpu_learner.py
+++ b/python/ray/rllib/optimizers/aso_multi_gpu_learner.py
@@ -47,9 +47,13 @@ class TFMultiGPULearner(LearnerThread):
         if not num_gpus:
             self.devices = ["/cpu:0"]
         elif _fake_gpus:
-            self.devices = ["/cpu:{}".format(i) for i in range(num_gpus)]
+            self.devices = [
+                "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
+            ]
         else:
-            self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)]
+            self.devices = [
+                "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
+            ]
         logger.info("TFMultiGPULearner devices {}".format(self.devices))
         assert self.train_batch_size % len(self.devices) == 0
         assert self.train_batch_size >= len(self.devices), "batch too small"
diff --git a/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py b/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py
index 4effcf32c..0a334e84e 100644
--- a/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py
+++ b/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py
@@ -68,6 +68,11 @@ class SyncBatchReplayOptimizer(PolicyOptimizer):
             batches = tmp
 
             for batch in batches:
+                if batch.count > self.max_buffer_size:
+                    raise ValueError(
+                        "The size of a single sample batch exceeds the replay "
+                        "buffer size ({} > {})".format(batch.count,
+                                                       self.max_buffer_size))
                 self.replay_buffer.append(batch)
                 self.num_steps_sampled += batch.count
                 self.buffer_size += batch.count