diff --git a/doc/source/rllib-toc.rst b/doc/source/rllib-toc.rst
index b52c7e13c..354118325 100644
--- a/doc/source/rllib-toc.rst
+++ b/doc/source/rllib-toc.rst
@@ -33,7 +33,7 @@ Training APIs
 
    -  `Callbacks and Custom Metrics <rllib-training.html#callbacks-and-custom-metrics>`__
 
-   -  `Customized Exploration Behavior (Training and Evaluation) <rllib-training.html#customized-exploration-behavior-training-and-evaluation>`__
+   -  `Customizing Exploration Behavior <rllib-training.html#customizing-exploration-behavior>`__
 
    -  `Customized Evaluation During Training <rllib-training.html#customized-evaluation-during-training>`__
 
diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst
index a90e7fd82..a4d4f1e63 100644
--- a/doc/source/rllib-training.rst
+++ b/doc/source/rllib-training.rst
@@ -520,8 +520,8 @@ Custom metrics can be accessed and visualized like any other training result:
 
 .. image:: custom_metric.png
 
-Customized Exploration Behavior (Training and Evaluation)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Customizing Exploration Behavior
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 RLlib offers a unified top-level API to configure and customize an agent’s
 exploration behavior, including the decisions (how and whether) to sample
@@ -665,9 +665,9 @@ Customized Evaluation During Training
 
 RLlib will report online training rewards, however in some cases you may want to compute
 rewards with different settings (e.g., with exploration turned off, or on a specific set
-of environment configurations). You can evaluate policies during training by setting one
-or more of the ``evaluation_interval``, ``evaluation_num_episodes``, ``evaluation_config``,
-``evaluation_num_workers``, and ``custom_eval_function`` configs
+of environment configurations). You can evaluate policies during training by setting
+the ``evaluation_interval`` config, and optionally also ``evaluation_num_episodes``,
+``evaluation_config``, ``evaluation_num_workers``, and ``custom_eval_function``
 (see `trainer.py <https://github.com/ray-project/ray/blob/master/rllib/agents/trainer.py>`__ for further documentation).
 
 By default, exploration is left as-is within ``evaluation_config``.
@@ -682,9 +682,11 @@ via:
        "explore": False
     }
 
-**IMPORTANT NOTE**: Policy gradient algorithms are able to find the optimal
-policy, even if this is a stochastic one. Setting "explore=False" above
-will result in the evaluation workers not using this optimal policy.
+.. note::
+
+    Policy gradient algorithms are able to find the optimal
+    policy, even if this is a stochastic one. Setting "explore=False" above
+    will result in the evaluation workers not using this stochastic policy.
 
 There is an end to end example of how to set up custom online evaluation in `custom_eval.py <https://github.com/ray-project/ray/blob/master/rllib/examples/custom_eval.py>`__. Note that if you only want to eval your policy at the end of training, you can set ``evaluation_interval: N``, where ``N`` is the number of training iterations before stopping.
 
diff --git a/rllib/agents/sac/sac.py b/rllib/agents/sac/sac.py
index d4f54c5d8..70d5f4f96 100644
--- a/rllib/agents/sac/sac.py
+++ b/rllib/agents/sac/sac.py
@@ -29,6 +29,9 @@ DEFAULT_CONFIG = with_common_config({
     "normalize_actions": True,
 
     # === Learning ===
+    # Disable setting done=True at end of episode. This should be set to True
+    # for infinite-horizon MDPs (e.g., many continuous control problems).
+    "no_done_at_end": False,
     # Update the target by \tau * policy + (1-\tau) * target_policy.
     "tau": 5e-3,
     # Initial value to use for the entropy weight alpha.
@@ -37,8 +40,6 @@ DEFAULT_CONFIG = with_common_config({
     # Discrete(2), -3.0 for Box(shape=(3,))).
     # This is the inverse of reward scale, and will be optimized automatically.
     "target_entropy": "auto",
-    # Disable setting done=True at end of episode.
-    "no_done_at_end": True,
     # N-step target updates.
     "n_step": 1,
 
diff --git a/rllib/tuned_examples/regression_tests/pendulum-sac.yaml b/rllib/tuned_examples/regression_tests/pendulum-sac.yaml
index 94dee065a..8f2a17050 100644
--- a/rllib/tuned_examples/regression_tests/pendulum-sac.yaml
+++ b/rllib/tuned_examples/regression_tests/pendulum-sac.yaml
@@ -9,3 +9,4 @@ pendulum-sac:
         clip_actions: False
         normalize_actions: True
         metrics_smoothing_episodes: 5
+        no_done_at_end: True