diff --git a/rllib/agents/sac/sac.py b/rllib/agents/sac/sac.py
index 91dfbfe4d..ed7f8d89b 100644
--- a/rllib/agents/sac/sac.py
+++ b/rllib/agents/sac/sac.py
@@ -29,26 +29,17 @@ DEFAULT_CONFIG = with_common_config({
     "normalize_actions": True,
 
     # === Learning ===
-    # Update the target by \tau * policy + (1-\tau) * target_policy
+    # Update the target by \tau * policy + (1-\tau) * target_policy.
     "tau": 5e-3,
     # Target entropy lower bound. This is the inverse of reward scale,
     # and will be optimized automatically.
     "target_entropy": "auto",
     # Disable setting done=True at end of episode.
     "no_done_at_end": True,
-    # N-step target updates
+    # N-step target updates.
     "n_step": 1,
-    # === Evaluation ===
-    # The evaluation stats will be reported under the "evaluation" metric key.
-    "evaluation_interval": 1,
-    # Number of episodes to run per evaluation period.
-    "evaluation_num_episodes": 1,
-    # Extra configuration that disables exploration.
-    "evaluation_config": {
-        "explore": False,
-    },
 
-    # Number of env steps to optimize for before returning
+    # Number of env steps to optimize for before returning.
     "timesteps_per_iteration": 100,
 
     # === Replay buffer ===
diff --git a/rllib/evaluation/sample_batch_builder.py b/rllib/evaluation/sample_batch_builder.py
index 5dad3fb8c..073b61f0e 100644
--- a/rllib/evaluation/sample_batch_builder.py
+++ b/rllib/evaluation/sample_batch_builder.py
@@ -95,9 +95,9 @@ class MultiAgentSampleBatchBuilder:
     def total(self):
         """Returns summed number of steps across all agent buffers."""
 
-        return sum(p.count for p in self.policy_builders.values())
+        return sum(a.count for a in self.agent_builders.values())
 
-    def has_pending_data(self):
+    def has_pending_agent_data(self):
         """Returns whether there is pending unprocessed data."""
 
         return len(self.agent_builders) > 0
diff --git a/rllib/evaluation/sampler.py b/rllib/evaluation/sampler.py
index c4eb38f1b..0910a5ec8 100644
--- a/rllib/evaluation/sampler.py
+++ b/rllib/evaluation/sampler.py
@@ -266,7 +266,7 @@ def _env_runner(base_env, extra_batch_callback, policies, policy_mapping_fn,
         if not horizon:
             horizon = (base_env.get_unwrapped()[0].spec.max_episode_steps)
     except Exception:
-        logger.debug("no episode horizon specified, assuming inf")
+        logger.debug("No episode horizon specified, assuming inf.")
     if not horizon:
         horizon = float("inf")
 
@@ -354,6 +354,8 @@ def _process_observations(base_env, policies, batch_builder_pool,
     active_envs = set()
     to_eval = defaultdict(list)
     outputs = []
+    large_batch_threshold = max(1000, unroll_length * 10) if \
+        unroll_length != float("inf") else 5000
 
     # For each environment
     for env_id, agent_obs in unfiltered_obs.items():
@@ -364,18 +366,21 @@ def _process_observations(base_env, policies, batch_builder_pool,
             episode.batch_builder.count += 1
             episode._add_agent_rewards(rewards[env_id])
 
-        if (episode.batch_builder.total() > max(1000, unroll_length * 10)
+        if (episode.batch_builder.total() > large_batch_threshold
                 and log_once("large_batch_warning")):
             logger.warning(
                 "More than {} observations for {} env steps ".format(
                     episode.batch_builder.total(),
                     episode.batch_builder.count) + "are buffered in "
                 "the sampler. If this is more than you expected, check that "
-                "that you set a horizon on your environment correctly. Note "
-                "that in multi-agent environments, `sample_batch_size` sets "
+                "that you set a horizon on your environment correctly and that"
+                " it terminates at some point. "
+                "Note: In multi-agent environments, `sample_batch_size` sets "
                 "the batch size based on environment steps, not the steps of "
                 "individual agents, which can result in unexpectedly large "
-                "batches.")
+                "batches. Also, you may be in evaluation waiting for your Env "
+                "to terminate (batch_mode=`complete_episodes`). Make sure it "
+                "does at some point.")
 
         # Check episode termination conditions
         if dones[env_id]["__all__"] or episode.length >= horizon:
@@ -398,7 +403,7 @@ def _process_observations(base_env, policies, batch_builder_pool,
             all_done = False
             active_envs.add(env_id)
 
-        # For each agent in the environment
+        # For each agent in the environment.
         for agent_id, raw_obs in agent_obs.items():
             policy_id = episode.policy_for(agent_id)
             prep_obs = _get_or_raise(preprocessors,
@@ -451,7 +456,7 @@ def _process_observations(base_env, policies, batch_builder_pool,
 
         # Cut the batch if we're not packing multiple episodes into one,
         # or if we've exceeded the requested batch size.
-        if episode.batch_builder.has_pending_data():
+        if episode.batch_builder.has_pending_agent_data():
             if dones[env_id]["__all__"] and not no_done_at_end:
                 episode.batch_builder.check_missing_dones()
             if (all_done and not pack) or \
diff --git a/rllib/tuned_examples/regression_tests/pendulum-sac.yaml b/rllib/tuned_examples/regression_tests/pendulum-sac.yaml
index 47404fb6e..94dee065a 100644
--- a/rllib/tuned_examples/regression_tests/pendulum-sac.yaml
+++ b/rllib/tuned_examples/regression_tests/pendulum-sac.yaml
@@ -5,7 +5,6 @@ pendulum-sac:
         episode_reward_mean: -300  # note that evaluation perf is higher
         timesteps_total: 10000
     config:
-        evaluation_interval: 1  # logged under evaluation/* metric keys
         soft_horizon: True
         clip_actions: False
         normalize_actions: True