diff --git a/python/ray/rllib/agents/ppo/ppo.py b/python/ray/rllib/agents/ppo/ppo.py
index 369d9db45..41c6db5ba 100644
--- a/python/ray/rllib/agents/ppo/ppo.py
+++ b/python/ray/rllib/agents/ppo/ppo.py
@@ -40,6 +40,9 @@ DEFAULT_CONFIG = with_common_config({
     "entropy_coeff": 0.0,
     # PPO clip parameter
     "clip_param": 0.3,
+    # Clip param for the value function. Note that this is sensitive to the
+    # scale of the rewards. If your expected V is large, increase this.
+    "vf_clip_param": 10.0,
     # Target value for KL divergence
     "kl_target": 0.01,
     # Number of GPUs to use for SGD
diff --git a/python/ray/rllib/agents/ppo/ppo_policy_graph.py b/python/ray/rllib/agents/ppo/ppo_policy_graph.py
index e23f0a5b3..e6fc90d1c 100644
--- a/python/ray/rllib/agents/ppo/ppo_policy_graph.py
+++ b/python/ray/rllib/agents/ppo/ppo_policy_graph.py
@@ -26,6 +26,7 @@ class PPOLoss(object):
                  cur_kl_coeff,
                  entropy_coeff=0,
                  clip_param=0.1,
+                 vf_clip_param=0.1,
                  vf_loss_coeff=1.0,
                  use_gae=True):
         """Constructs the loss for Proximal Policy Objective.
@@ -49,6 +50,7 @@ class PPOLoss(object):
                 coefficient.
             entropy_coeff (float): Coefficient of the entropy regularizer.
             clip_param (float): Clip parameter
+            vf_clip_param (float): Clip parameter for the value function
             vf_loss_coeff (float): Coefficient of the value function loss
             use_gae (bool): If true, use the Generalized Advantage Estimator.
         """
@@ -71,8 +73,8 @@ class PPOLoss(object):
 
         if use_gae:
             vf_loss1 = tf.square(value_fn - value_targets)
-            vf_clipped = vf_preds + tf.clip_by_value(value_fn - vf_preds,
-                                                     -clip_param, clip_param)
+            vf_clipped = vf_preds + tf.clip_by_value(
+                value_fn - vf_preds, -vf_clip_param, vf_clip_param)
             vf_loss2 = tf.square(vf_clipped - value_targets)
             vf_loss = tf.maximum(vf_loss1, vf_loss2)
             self.mean_vf_loss = tf.reduce_mean(vf_loss)
@@ -188,6 +190,7 @@ class PPOPolicyGraph(LearningRateSchedule, TFPolicyGraph):
             self.kl_coeff,
             entropy_coeff=self.config["entropy_coeff"],
             clip_param=self.config["clip_param"],
+            vf_clip_param=self.config["vf_clip_param"],
             vf_loss_coeff=self.config["vf_loss_coeff"],
             use_gae=self.config["use_gae"])
 
diff --git a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml
index dcb2775fa..60df6825b 100644
--- a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml
+++ b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml
@@ -4,7 +4,8 @@ pendulum-ppo:
     run: PPO
     config:
         train_batch_size: 2048
-        num_workers: 4
+        vf_clip_param: 10.0
+        num_workers: 2
         lambda: 0.1
         gamma: 0.95
         lr: 0.0003
diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml
index 36830dcd6..8b9d69fce 100644
--- a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml
+++ b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml
@@ -3,10 +3,10 @@ pendulum-ppo:
     run: PPO
     stop:
         episode_reward_mean: -160
-        # expect -140 within 300-500k steps
         timesteps_total: 600000
     config:
         train_batch_size: 2048
+        vf_clip_param: 10.0
         num_workers: 4
         lambda: 0.1
         gamma: 0.95
@@ -15,4 +15,3 @@ pendulum-ppo:
         num_sgd_iter: 10
         model:
             fcnet_hiddens: [64, 64]
-            squash_to_range: True