diff --git a/python/ray/rllib/utils/tf_policy_graph.py b/python/ray/rllib/utils/tf_policy_graph.py
index 6588060bf..a3dfd174b 100644
--- a/python/ray/rllib/utils/tf_policy_graph.py
+++ b/python/ray/rllib/utils/tf_policy_graph.py
@@ -105,7 +105,7 @@ class TFPolicyGraph(PolicyGraph):
         feed_dict[self._is_training] = True
         for ph, value in zip(self._grads, gradients):
             feed_dict[ph] = value
-        fetches = self.sess.run(
+        fetches = self._sess.run(
             [self._apply_op, self.extra_apply_grad_fetches()],
             feed_dict=feed_dict)
         return fetches[1]